import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib.pyplot import show
df1=pd.read_csv('d:/training_set.csv')
df1
| Id | MSSubClass | MSZoning | LotFrontage | LotArea | Street | Alley | LotShape | LandContour | Utilities | ... | PoolArea | PoolQC | Fence | MiscFeature | MiscVal | MoSold | YrSold | SaleType | SaleCondition | SalePrice | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 60 | RL | 65.0 | 8450 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 2 | 2008 | WD | Normal | 208500 |
| 1 | 2 | 20 | RL | 80.0 | 9600 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 5 | 2007 | WD | Normal | 181500 |
| 2 | 3 | 60 | RL | 68.0 | 11250 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 9 | 2008 | WD | Normal | 223500 |
| 3 | 4 | 70 | RL | 60.0 | 9550 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 2 | 2006 | WD | Abnorml | 140000 |
| 4 | 5 | 60 | RL | 84.0 | 14260 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 12 | 2008 | WD | Normal | 250000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1455 | 1456 | 60 | RL | 62.0 | 7917 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 8 | 2007 | WD | Normal | 175000 |
| 1456 | 1457 | 20 | RL | 85.0 | 13175 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | MnPrv | NaN | 0 | 2 | 2010 | WD | Normal | 210000 |
| 1457 | 1458 | 70 | RL | 66.0 | 9042 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | GdPrv | Shed | 2500 | 5 | 2010 | WD | Normal | 266500 |
| 1458 | 1459 | 20 | RL | 68.0 | 9717 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 4 | 2010 | WD | Normal | 142125 |
| 1459 | 1460 | 20 | RL | 75.0 | 9937 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | NaN | NaN | NaN | 0 | 6 | 2008 | WD | Normal | 147500 |
1460 rows × 81 columns
df1.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1460 entries, 0 to 1459 Data columns (total 81 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Id 1460 non-null int64 1 MSSubClass 1460 non-null int64 2 MSZoning 1460 non-null object 3 LotFrontage 1201 non-null float64 4 LotArea 1460 non-null int64 5 Street 1460 non-null object 6 Alley 91 non-null object 7 LotShape 1460 non-null object 8 LandContour 1460 non-null object 9 Utilities 1460 non-null object 10 LotConfig 1460 non-null object 11 LandSlope 1460 non-null object 12 Neighborhood 1460 non-null object 13 Condition1 1460 non-null object 14 Condition2 1460 non-null object 15 BldgType 1460 non-null object 16 HouseStyle 1460 non-null object 17 OverallQual 1460 non-null int64 18 OverallCond 1460 non-null int64 19 YearBuilt 1460 non-null int64 20 YearRemodAdd 1460 non-null int64 21 RoofStyle 1460 non-null object 22 RoofMatl 1460 non-null object 23 Exterior1st 1460 non-null object 24 Exterior2nd 1460 non-null object 25 MasVnrType 1452 non-null object 26 MasVnrArea 1452 non-null float64 27 ExterQual 1460 non-null object 28 ExterCond 1460 non-null object 29 Foundation 1460 non-null object 30 BsmtQual 1423 non-null object 31 BsmtCond 1423 non-null object 32 BsmtExposure 1422 non-null object 33 BsmtFinType1 1423 non-null object 34 BsmtFinSF1 1460 non-null int64 35 BsmtFinType2 1422 non-null object 36 BsmtFinSF2 1460 non-null int64 37 BsmtUnfSF 1460 non-null int64 38 TotalBsmtSF 1460 non-null int64 39 Heating 1460 non-null object 40 HeatingQC 1460 non-null object 41 CentralAir 1460 non-null object 42 Electrical 1459 non-null object 43 1stFlrSF 1460 non-null int64 44 2ndFlrSF 1460 non-null int64 45 LowQualFinSF 1460 non-null int64 46 GrLivArea 1460 non-null int64 47 BsmtFullBath 1460 non-null int64 48 BsmtHalfBath 1460 non-null int64 49 FullBath 1460 non-null int64 50 HalfBath 1460 non-null int64 51 BedroomAbvGr 1460 non-null int64 52 KitchenAbvGr 1460 non-null int64 53 KitchenQual 1460 non-null object 54 TotRmsAbvGrd 1460 non-null int64 55 Functional 1460 non-null object 56 Fireplaces 1460 non-null int64 57 FireplaceQu 770 non-null object 58 GarageType 1379 non-null object 59 GarageYrBlt 1379 non-null float64 60 GarageFinish 1379 non-null object 61 GarageCars 1460 non-null int64 62 GarageArea 1460 non-null int64 63 GarageQual 1379 non-null object 64 GarageCond 1379 non-null object 65 PavedDrive 1460 non-null object 66 WoodDeckSF 1460 non-null int64 67 OpenPorchSF 1460 non-null int64 68 EnclosedPorch 1460 non-null int64 69 3SsnPorch 1460 non-null int64 70 ScreenPorch 1460 non-null int64 71 PoolArea 1460 non-null int64 72 PoolQC 7 non-null object 73 Fence 281 non-null object 74 MiscFeature 54 non-null object 75 MiscVal 1460 non-null int64 76 MoSold 1460 non-null int64 77 YrSold 1460 non-null int64 78 SaleType 1460 non-null object 79 SaleCondition 1460 non-null object 80 SalePrice 1460 non-null int64 dtypes: float64(3), int64(35), object(43) memory usage: 924.0+ KB
df1['SaleCondition'].value_counts()
Normal 1198 Partial 125 Abnorml 101 Family 20 Alloca 12 AdjLand 4 Name: SaleCondition, dtype: int64
df1.shape
(1460, 81)
df1.isna().sum()
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 259
LotArea 0
...
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
SalePrice 0
Length: 81, dtype: int64
for i in df1.columns:
if (df1[i].isna().sum())>0:
if df1[i].dtypes=='object':
x=df1[i].mode()[0]
df1[i]=df1[i].fillna(x)
else:
x=df1[i].mean()
df1[i]=df1[i].fillna(x)
df1.isna().sum()
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 0
LotArea 0
..
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
SalePrice 0
Length: 81, dtype: int64
X=df1.drop(labels=['SalePrice','Id','LowQualFinSF','MiscVal'],axis=1)
Y=df1['SalePrice']
X.shape
(1460, 77)
Y.shape
(1460,)
cat=[]
con=[]
for i in X.columns:
if X[i].dtypes=='object':
cat.append(i)
else:
con.append(i)
print(cat)
print(con)
['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'] ['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MoSold', 'YrSold']
cat=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood',
'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating',
'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish',
'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
con=['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea',
'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath',
'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt',
'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea',
'MoSold', 'YrSold']
for i in df1.columns:
if df1[i].dtypes=='object':
df1[i].value_counts().plot(kind='bar')
plt.show()
else:
sb.histplot(data=df1,x=df1[i],kde=True)
plt.show()
for i in df1.columns:
if df1[i].dtypes=='object':
sb.boxplot(data=df1,x=df1[i],y='SalePrice')
plt.show()
else:
plt.scatter(data=df1,x=df1[i],y='SalePrice')
plt.xlabel(i)
plt.ylabel('SalePrice')
plt.title(f'{i} vs SalePrice')
plt.show()
a=df1.corr()
a
C:\Users\Samir\AppData\Local\Temp\ipykernel_11084\2980587853.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. a=df1.corr()
| Id | MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | ... | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MiscVal | MoSold | YrSold | SalePrice | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Id | 1.000000 | 0.011156 | -0.009601 | -0.033226 | -0.028365 | 0.012609 | -0.012713 | -0.021998 | -0.050199 | -0.005024 | ... | -0.029643 | -0.000477 | 0.002889 | -0.046635 | 0.001330 | 0.057044 | -0.006242 | 0.021172 | 0.000712 | -0.021917 |
| MSSubClass | 0.011156 | 1.000000 | -0.357056 | -0.139781 | 0.032628 | -0.059316 | 0.027850 | 0.040581 | 0.022895 | -0.069836 | ... | -0.012579 | -0.006100 | -0.012037 | -0.043825 | -0.026030 | 0.008283 | -0.007683 | -0.013585 | -0.021407 | -0.084284 |
| LotFrontage | -0.009601 | -0.357056 | 1.000000 | 0.306795 | 0.234196 | -0.052820 | 0.117598 | 0.082746 | 0.179283 | 0.215828 | ... | 0.077106 | 0.137454 | 0.009790 | 0.062335 | 0.037684 | 0.180868 | 0.001168 | 0.010158 | 0.006768 | 0.334901 |
| LotArea | -0.033226 | -0.139781 | 0.306795 | 1.000000 | 0.105806 | -0.005636 | 0.014228 | 0.013788 | 0.103960 | 0.214103 | ... | 0.171698 | 0.084774 | -0.018340 | 0.020423 | 0.043160 | 0.077672 | 0.038068 | 0.001205 | -0.014261 | 0.263843 |
| OverallQual | -0.028365 | 0.032628 | 0.234196 | 0.105806 | 1.000000 | -0.091932 | 0.572323 | 0.550684 | 0.410238 | 0.239666 | ... | 0.238923 | 0.308819 | -0.113937 | 0.030371 | 0.064886 | 0.065166 | -0.031406 | 0.070815 | -0.027347 | 0.790982 |
| OverallCond | 0.012609 | -0.059316 | -0.052820 | -0.005636 | -0.091932 | 1.000000 | -0.375983 | 0.073741 | -0.127788 | -0.046231 | ... | -0.003334 | -0.032589 | 0.070356 | 0.025504 | 0.054811 | -0.001985 | 0.068777 | -0.003511 | 0.043950 | -0.077856 |
| YearBuilt | -0.012713 | 0.027850 | 0.117598 | 0.014228 | 0.572323 | -0.375983 | 1.000000 | 0.592855 | 0.314745 | 0.249503 | ... | 0.224880 | 0.188686 | -0.387268 | 0.031355 | -0.050364 | 0.004950 | -0.034383 | 0.012398 | -0.013618 | 0.522897 |
| YearRemodAdd | -0.021998 | 0.040581 | 0.082746 | 0.013788 | 0.550684 | 0.073741 | 0.592855 | 1.000000 | 0.179186 | 0.128451 | ... | 0.205726 | 0.226298 | -0.193919 | 0.045286 | -0.038740 | 0.005829 | -0.010286 | 0.021490 | 0.035743 | 0.507101 |
| MasVnrArea | -0.050199 | 0.022895 | 0.179283 | 0.103960 | 0.410238 | -0.127788 | 0.314745 | 0.179186 | 1.000000 | 0.263582 | ... | 0.159349 | 0.124965 | -0.109849 | 0.018795 | 0.061453 | 0.011723 | -0.029815 | -0.005940 | -0.008184 | 0.475241 |
| BsmtFinSF1 | -0.005024 | -0.069836 | 0.215828 | 0.214103 | 0.239666 | -0.046231 | 0.249503 | 0.128451 | 0.263582 | 1.000000 | ... | 0.204306 | 0.111761 | -0.102303 | 0.026451 | 0.062021 | 0.140491 | 0.003571 | -0.015727 | 0.014359 | 0.386420 |
| BsmtFinSF2 | -0.005968 | -0.065649 | 0.043340 | 0.111170 | -0.059119 | 0.040229 | -0.049107 | -0.067759 | -0.072302 | -0.050117 | ... | 0.067898 | 0.003093 | 0.036543 | -0.029993 | 0.088871 | 0.041709 | 0.004940 | -0.015211 | 0.031706 | -0.011378 |
| BsmtUnfSF | -0.007940 | -0.140759 | 0.122156 | -0.002618 | 0.308159 | -0.136841 | 0.149040 | 0.181133 | 0.114184 | -0.495251 | ... | -0.005316 | 0.129005 | -0.002538 | 0.020764 | -0.012579 | -0.035092 | -0.023837 | 0.034888 | -0.041258 | 0.214479 |
| TotalBsmtSF | -0.015415 | -0.238518 | 0.363358 | 0.260833 | 0.537808 | -0.171098 | 0.391452 | 0.291066 | 0.362452 | 0.522396 | ... | 0.232019 | 0.247264 | -0.095478 | 0.037384 | 0.084489 | 0.126053 | -0.018479 | 0.013196 | -0.014969 | 0.613581 |
| 1stFlrSF | 0.010496 | -0.251758 | 0.414266 | 0.299475 | 0.476224 | -0.144203 | 0.281986 | 0.240379 | 0.342160 | 0.445863 | ... | 0.235459 | 0.211671 | -0.065292 | 0.056104 | 0.088758 | 0.131525 | -0.021096 | 0.031372 | -0.013604 | 0.605852 |
| 2ndFlrSF | 0.005590 | 0.307886 | 0.072483 | 0.050986 | 0.295493 | 0.028942 | 0.010308 | 0.140024 | 0.174019 | -0.137079 | ... | 0.092165 | 0.208026 | 0.061989 | -0.024358 | 0.040606 | 0.081487 | 0.016197 | 0.035164 | -0.028700 | 0.319334 |
| LowQualFinSF | -0.044230 | 0.046474 | 0.036849 | 0.004779 | -0.030429 | 0.025494 | -0.183784 | -0.062419 | -0.069068 | -0.064503 | ... | -0.025444 | 0.018251 | 0.061081 | -0.004296 | 0.026799 | 0.062157 | -0.003793 | -0.022174 | -0.028921 | -0.025606 |
| GrLivArea | 0.008273 | 0.074853 | 0.368392 | 0.263116 | 0.593007 | -0.079686 | 0.199010 | 0.287389 | 0.389893 | 0.208171 | ... | 0.247433 | 0.330224 | 0.009113 | 0.020643 | 0.101510 | 0.170205 | -0.002416 | 0.050240 | -0.036526 | 0.708624 |
| BsmtFullBath | 0.002289 | 0.003491 | 0.091481 | 0.158155 | 0.111098 | -0.054942 | 0.187599 | 0.119470 | 0.085055 | 0.649212 | ... | 0.175315 | 0.067341 | -0.049911 | -0.000106 | 0.023148 | 0.067616 | -0.023047 | -0.025361 | 0.067049 | 0.227122 |
| BsmtHalfBath | -0.020155 | -0.002333 | -0.006419 | 0.048046 | -0.040150 | 0.117821 | -0.038162 | -0.012337 | 0.026669 | 0.067418 | ... | 0.040161 | -0.025324 | -0.008555 | 0.035114 | 0.032121 | 0.020025 | -0.007367 | 0.032873 | -0.046524 | -0.016844 |
| FullBath | 0.005587 | 0.131608 | 0.180424 | 0.126031 | 0.550600 | -0.194149 | 0.468271 | 0.439046 | 0.275730 | 0.058543 | ... | 0.187703 | 0.259977 | -0.115093 | 0.035353 | -0.008106 | 0.049604 | -0.014290 | 0.055872 | -0.019669 | 0.560664 |
| HalfBath | 0.006784 | 0.177354 | 0.048258 | 0.014259 | 0.273458 | -0.060769 | 0.242656 | 0.183331 | 0.200802 | 0.004262 | ... | 0.108080 | 0.199740 | -0.095317 | -0.004972 | 0.072426 | 0.022381 | 0.001290 | -0.009050 | -0.010269 | 0.284108 |
| BedroomAbvGr | 0.037719 | -0.023438 | 0.237023 | 0.119690 | 0.101676 | 0.012980 | -0.070651 | -0.040581 | 0.102417 | -0.107355 | ... | 0.046854 | 0.093810 | 0.041570 | -0.024478 | 0.044300 | 0.070703 | 0.007767 | 0.046544 | -0.036014 | 0.168213 |
| KitchenAbvGr | 0.002951 | 0.281721 | -0.005805 | -0.017784 | -0.183882 | -0.087001 | -0.174800 | -0.149598 | -0.037364 | -0.081007 | ... | -0.090130 | -0.070091 | 0.037312 | -0.024600 | -0.051613 | -0.014525 | 0.062341 | 0.026589 | 0.031687 | -0.135907 |
| TotRmsAbvGrd | 0.027239 | 0.040380 | 0.320146 | 0.190015 | 0.427452 | -0.057583 | 0.095589 | 0.191740 | 0.280027 | 0.044316 | ... | 0.165984 | 0.234192 | 0.004151 | -0.006683 | 0.059383 | 0.083757 | 0.024763 | 0.036907 | -0.034516 | 0.533723 |
| Fireplaces | -0.019772 | -0.045569 | 0.235755 | 0.271364 | 0.396765 | -0.023820 | 0.147716 | 0.112581 | 0.247906 | 0.260011 | ... | 0.200019 | 0.169405 | -0.024822 | 0.011257 | 0.184530 | 0.095074 | 0.001409 | 0.046357 | -0.024096 | 0.466929 |
| GarageYrBlt | 0.000070 | 0.080187 | 0.064324 | -0.024812 | 0.518018 | -0.306169 | 0.780555 | 0.618130 | 0.249367 | 0.150338 | ... | 0.220623 | 0.218490 | -0.285882 | 0.023534 | -0.075256 | -0.014499 | -0.031853 | 0.005173 | -0.000987 | 0.470177 |
| GarageCars | 0.016570 | -0.040110 | 0.269729 | 0.154871 | 0.600671 | -0.185758 | 0.537850 | 0.420622 | 0.363778 | 0.224054 | ... | 0.226342 | 0.213569 | -0.151434 | 0.035765 | 0.050494 | 0.020934 | -0.043080 | 0.040522 | -0.039117 | 0.640409 |
| GarageArea | 0.017634 | -0.098672 | 0.323663 | 0.180403 | 0.562022 | -0.151521 | 0.478954 | 0.371600 | 0.372567 | 0.296970 | ... | 0.224666 | 0.241435 | -0.121777 | 0.035087 | 0.051412 | 0.061047 | -0.027400 | 0.027974 | -0.027378 | 0.623431 |
| WoodDeckSF | -0.029643 | -0.012579 | 0.077106 | 0.171698 | 0.238923 | -0.003334 | 0.224880 | 0.205726 | 0.159349 | 0.204306 | ... | 1.000000 | 0.058661 | -0.125989 | -0.032771 | -0.074181 | 0.073378 | -0.009551 | 0.021011 | 0.022270 | 0.324413 |
| OpenPorchSF | -0.000477 | -0.006100 | 0.137454 | 0.084774 | 0.308819 | -0.032589 | 0.188686 | 0.226298 | 0.124965 | 0.111761 | ... | 0.058661 | 1.000000 | -0.093079 | -0.005842 | 0.074304 | 0.060762 | -0.018584 | 0.071255 | -0.057619 | 0.315856 |
| EnclosedPorch | 0.002889 | -0.012037 | 0.009790 | -0.018340 | -0.113937 | 0.070356 | -0.387268 | -0.193919 | -0.109849 | -0.102303 | ... | -0.125989 | -0.093079 | 1.000000 | -0.037305 | -0.082864 | 0.054203 | 0.018361 | -0.028887 | -0.009916 | -0.128578 |
| 3SsnPorch | -0.046635 | -0.043825 | 0.062335 | 0.020423 | 0.030371 | 0.025504 | 0.031355 | 0.045286 | 0.018795 | 0.026451 | ... | -0.032771 | -0.005842 | -0.037305 | 1.000000 | -0.031436 | -0.007992 | 0.000354 | 0.029474 | 0.018645 | 0.044584 |
| ScreenPorch | 0.001330 | -0.026030 | 0.037684 | 0.043160 | 0.064886 | 0.054811 | -0.050364 | -0.038740 | 0.061453 | 0.062021 | ... | -0.074181 | 0.074304 | -0.082864 | -0.031436 | 1.000000 | 0.051307 | 0.031946 | 0.023217 | 0.010694 | 0.111447 |
| PoolArea | 0.057044 | 0.008283 | 0.180868 | 0.077672 | 0.065166 | -0.001985 | 0.004950 | 0.005829 | 0.011723 | 0.140491 | ... | 0.073378 | 0.060762 | 0.054203 | -0.007992 | 0.051307 | 1.000000 | 0.029669 | -0.033737 | -0.059689 | 0.092404 |
| MiscVal | -0.006242 | -0.007683 | 0.001168 | 0.038068 | -0.031406 | 0.068777 | -0.034383 | -0.010286 | -0.029815 | 0.003571 | ... | -0.009551 | -0.018584 | 0.018361 | 0.000354 | 0.031946 | 0.029669 | 1.000000 | -0.006495 | 0.004906 | -0.021190 |
| MoSold | 0.021172 | -0.013585 | 0.010158 | 0.001205 | 0.070815 | -0.003511 | 0.012398 | 0.021490 | -0.005940 | -0.015727 | ... | 0.021011 | 0.071255 | -0.028887 | 0.029474 | 0.023217 | -0.033737 | -0.006495 | 1.000000 | -0.145721 | 0.046432 |
| YrSold | 0.000712 | -0.021407 | 0.006768 | -0.014261 | -0.027347 | 0.043950 | -0.013618 | 0.035743 | -0.008184 | 0.014359 | ... | 0.022270 | -0.057619 | -0.009916 | 0.018645 | 0.010694 | -0.059689 | 0.004906 | -0.145721 | 1.000000 | -0.028923 |
| SalePrice | -0.021917 | -0.084284 | 0.334901 | 0.263843 | 0.790982 | -0.077856 | 0.522897 | 0.507101 | 0.475241 | 0.386420 | ... | 0.324413 | 0.315856 | -0.128578 | 0.044584 | 0.111447 | 0.092404 | -0.021190 | 0.046432 | -0.028923 | 1.000000 |
38 rows × 38 columns
sb.heatmap(a)
<Axes: >
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
X1=pd.DataFrame(ss.fit_transform(X[con]),columns=con)
X1
| MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | ... | GarageCars | GarageArea | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MoSold | YrSold | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.073375 | -0.229372 | -0.207142 | 0.651479 | -0.517200 | 1.050994 | 0.878668 | 0.511418 | 0.575425 | -0.288653 | ... | 0.311725 | 0.351000 | -0.752176 | 0.216503 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -1.599111 | 0.138777 |
| 1 | -0.872563 | 0.451936 | -0.091886 | -0.071836 | 2.179628 | 0.156734 | -0.429577 | -0.574410 | 1.171992 | -0.288653 | ... | 0.311725 | -0.060731 | 1.626195 | -0.704483 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -0.489110 | -0.614439 |
| 2 | 0.073375 | -0.093110 | 0.073480 | 0.651479 | -0.517200 | 0.984752 | 0.830215 | 0.323060 | 0.092907 | -0.288653 | ... | 0.311725 | 0.631726 | -0.752176 | -0.070361 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | 0.990891 | 0.138777 |
| 3 | 0.309859 | -0.456474 | -0.096897 | 0.651479 | -0.517200 | -1.863632 | -0.720298 | -0.574410 | -0.499274 | -0.288653 | ... | 1.650307 | 0.790804 | -0.752176 | -0.176048 | 4.092524 | -0.116339 | -0.270208 | -0.068692 | -1.599111 | -1.367655 |
| 4 | 0.073375 | 0.633618 | 0.375148 | 1.374795 | -0.517200 | 0.951632 | 0.733308 | 1.364570 | 0.463568 | -0.288653 | ... | 1.650307 | 1.698485 | 0.780197 | 0.563760 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | 2.100892 | 0.138777 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1455 | 0.073375 | -0.365633 | -0.260560 | -0.071836 | -0.517200 | 0.918511 | 0.733308 | -0.574410 | -0.973018 | -0.288653 | ... | 0.311725 | -0.060731 | -0.752176 | -0.100558 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | 0.620891 | -0.614439 |
| 1456 | -0.872563 | 0.679039 | 0.266407 | -0.071836 | 0.381743 | 0.222975 | 0.151865 | 0.084843 | 0.759659 | 0.722112 | ... | 0.311725 | 0.126420 | 2.033231 | -0.704483 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -1.599111 | 1.645210 |
| 1457 | 0.309859 | -0.183951 | -0.147810 | 0.651479 | 3.078570 | -1.002492 | 1.024029 | -0.574410 | -0.369871 | -0.288653 | ... | -1.026858 | -1.033914 | -0.752176 | 0.201405 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -0.489110 | 1.645210 |
| 1458 | -0.872563 | -0.093110 | -0.080160 | -0.795151 | 0.381743 | -0.704406 | 0.539493 | -0.574410 | -0.865548 | 6.092188 | ... | -1.026858 | -1.090059 | 2.168910 | -0.704483 | 1.473789 | -0.116339 | -0.270208 | -0.068692 | -0.859110 | 1.645210 |
| 1459 | -0.872563 | 0.224833 | -0.058112 | -0.795151 | 0.381743 | -0.207594 | -0.962566 | -0.574410 | 0.847389 | 1.509640 | ... | -1.026858 | -0.921624 | 5.121921 | 0.322190 | -0.359325 | -0.116339 | -0.270208 | -0.068692 | -0.119110 | 0.138777 |
1460 rows × 34 columns
X2=pd.get_dummies(X[cat])
X2
| MSZoning_C (all) | MSZoning_FV | MSZoning_RH | MSZoning_RL | MSZoning_RM | Street_Grvl | Street_Pave | Alley_Grvl | Alley_Pave | LotShape_IR1 | ... | SaleType_ConLw | SaleType_New | SaleType_Oth | SaleType_WD | SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Family | SaleCondition_Normal | SaleCondition_Partial | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 4 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1455 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1456 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1457 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1458 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1459 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
1460 rows × 252 columns
out=[]
for i in con:
out.extend((X1[(X1[i]>3) | (X1[i]<-3)]).index)
print(out)
[9, 48, 93, 125, 165, 246, 291, 300, 312, 335, 411, 488, 520, 535, 635, 637, 703, 705, 713, 861, 969, 985, 1030, 1062, 1144, 1186, 1190, 1266, 1393, 1416, 171, 197, 231, 277, 313, 446, 807, 909, 934, 1107, 1127, 1173, 1182, 1211, 1298, 1337, 53, 249, 313, 335, 384, 451, 457, 661, 706, 769, 848, 1298, 1396, 375, 533, 88, 185, 191, 218, 241, 250, 304, 375, 378, 398, 461, 508, 519, 583, 676, 703, 726, 745, 980, 991, 1123, 1149, 1213, 1268, 1327, 1352, 1435, 1457, 304, 630, 747, 1132, 1137, 1349, 37, 58, 70, 105, 115, 161, 169, 178, 224, 297, 349, 403, 477, 517, 523, 654, 691, 718, 755, 763, 798, 808, 825, 898, 981, 1111, 1169, 1228, 1289, 1298, 1373, 1417, 70, 178, 523, 898, 1182, 1298, 24, 52, 113, 116, 153, 166, 233, 253, 260, 263, 271, 273, 313, 322, 355, 414, 440, 446, 470, 493, 542, 548, 577, 586, 599, 666, 697, 764, 785, 828, 842, 854, 888, 918, 923, 924, 1040, 1059, 1077, 1152, 1220, 1253, 1299, 1308, 1320, 1369, 1387, 1418, 1445, 1458, 137, 224, 278, 477, 496, 581, 678, 774, 798, 932, 1267, 224, 332, 440, 496, 523, 691, 1044, 1182, 1298, 1373, 224, 440, 496, 523, 529, 691, 898, 1024, 1044, 1182, 1298, 1373, 304, 691, 1169, 1182, 118, 185, 197, 304, 496, 523, 608, 635, 691, 769, 798, 1169, 1182, 1268, 1298, 1353, 53, 188, 313, 326, 335, 420, 454, 588, 634, 738, 807, 921, 942, 1163, 1270, 1298, 1, 26, 33, 37, 41, 50, 93, 116, 129, 176, 197, 201, 213, 215, 218, 245, 249, 251, 253, 298, 299, 314, 330, 352, 358, 367, 414, 421, 426, 499, 504, 558, 574, 576, 580, 597, 611, 628, 633, 658, 691, 697, 717, 741, 743, 745, 814, 828, 892, 920, 925, 931, 944, 952, 953, 954, 1006, 1029, 1041, 1047, 1052, 1055, 1069, 1072, 1076, 1080, 1103, 1118, 1123, 1149, 1156, 1181, 1213, 1225, 1276, 1287, 1327, 1335, 1350, 1389, 1405, 1415, 188, 298, 597, 624, 628, 921, 1154, 1163, 1230, 1283, 1350, 1450, 53, 144, 189, 291, 330, 570, 634, 635, 843, 897, 1163, 1213, 1270, 1350, 8, 9, 17, 39, 48, 74, 78, 93, 102, 137, 144, 165, 188, 246, 330, 342, 420, 441, 454, 488, 505, 520, 529, 570, 634, 635, 637, 676, 703, 705, 728, 736, 778, 809, 843, 886, 894, 897, 910, 913, 921, 940, 942, 943, 954, 955, 984, 1003, 1011, 1030, 1062, 1090, 1163, 1186, 1216, 1230, 1232, 1266, 1275, 1283, 1292, 1336, 1350, 1391, 1393, 1412, 1416, 1450, 185, 635, 769, 803, 897, 910, 1031, 1173, 1230, 1298, 1350, 1386, 166, 309, 605, 642, 1298, 93, 653, 178, 581, 664, 825, 1061, 1190, 1298, 53, 64, 166, 169, 335, 343, 357, 480, 661, 769, 828, 848, 893, 961, 974, 1044, 1068, 1210, 1312, 1313, 1423, 1459, 28, 185, 293, 495, 499, 523, 583, 591, 645, 664, 666, 713, 735, 775, 784, 807, 854, 875, 947, 961, 996, 1184, 1193, 1292, 1298, 1328, 1369, 3, 7, 154, 197, 260, 306, 314, 325, 328, 358, 365, 380, 459, 462, 496, 520, 577, 630, 648, 653, 660, 662, 718, 720, 747, 799, 813, 836, 840, 918, 939, 945, 1013, 1030, 1081, 1119, 1139, 1150, 1152, 1185, 1197, 1202, 1248, 1266, 1326, 1360, 1382, 1393, 1419, 1439, 1445, 5, 55, 120, 129, 159, 182, 187, 205, 237, 258, 280, 546, 704, 726, 744, 889, 941, 1080, 1156, 1161, 1181, 1346, 1437, 46, 72, 80, 104, 176, 185, 189, 196, 289, 297, 312, 339, 351, 359, 360, 366, 400, 426, 471, 475, 550, 605, 618, 625, 647, 673, 764, 769, 785, 795, 803, 828, 830, 854, 859, 887, 888, 907, 919, 944, 1037, 1055, 1067, 1070, 1154, 1171, 1184, 1228, 1282, 1293, 1301, 1320, 1328, 1386, 1414, 197, 810, 1170, 1182, 1298, 1386, 1423]
import numpy as np
outliers=np.unique(out)
outliers
array([ 1, 3, 5, 7, 8, 9, 17, 24, 26, 28, 33,
37, 39, 41, 46, 48, 50, 52, 53, 55, 58, 64,
70, 72, 74, 78, 80, 88, 93, 102, 104, 105, 113,
115, 116, 118, 120, 125, 129, 137, 144, 153, 154, 159,
161, 165, 166, 169, 171, 176, 178, 182, 185, 187, 188,
189, 191, 196, 197, 201, 205, 213, 215, 218, 224, 231,
233, 237, 241, 245, 246, 249, 250, 251, 253, 258, 260,
263, 271, 273, 277, 278, 280, 289, 291, 293, 297, 298,
299, 300, 304, 306, 309, 312, 313, 314, 322, 325, 326,
328, 330, 332, 335, 339, 342, 343, 349, 351, 352, 355,
357, 358, 359, 360, 365, 366, 367, 375, 378, 380, 384,
398, 400, 403, 411, 414, 420, 421, 426, 440, 441, 446,
451, 454, 457, 459, 461, 462, 470, 471, 475, 477, 480,
488, 493, 495, 496, 499, 504, 505, 508, 517, 519, 520,
523, 529, 533, 535, 542, 546, 548, 550, 558, 570, 574,
576, 577, 580, 581, 583, 586, 588, 591, 597, 599, 605,
608, 611, 618, 624, 625, 628, 630, 633, 634, 635, 637,
642, 645, 647, 648, 653, 654, 658, 660, 661, 662, 664,
666, 673, 676, 678, 691, 697, 703, 704, 705, 706, 713,
717, 718, 720, 726, 728, 735, 736, 738, 741, 743, 744,
745, 747, 755, 763, 764, 769, 774, 775, 778, 784, 785,
795, 798, 799, 803, 807, 808, 809, 810, 813, 814, 825,
828, 830, 836, 840, 842, 843, 848, 854, 859, 861, 875,
886, 887, 888, 889, 892, 893, 894, 897, 898, 907, 909,
910, 913, 918, 919, 920, 921, 923, 924, 925, 931, 932,
934, 939, 940, 941, 942, 943, 944, 945, 947, 952, 953,
954, 955, 961, 969, 974, 980, 981, 984, 985, 991, 996,
1003, 1006, 1011, 1013, 1024, 1029, 1030, 1031, 1037, 1040, 1041,
1044, 1047, 1052, 1055, 1059, 1061, 1062, 1067, 1068, 1069, 1070,
1072, 1076, 1077, 1080, 1081, 1090, 1103, 1107, 1111, 1118, 1119,
1123, 1127, 1132, 1137, 1139, 1144, 1149, 1150, 1152, 1154, 1156,
1161, 1163, 1169, 1170, 1171, 1173, 1181, 1182, 1184, 1185, 1186,
1190, 1193, 1197, 1202, 1210, 1211, 1213, 1216, 1220, 1225, 1228,
1230, 1232, 1248, 1253, 1266, 1267, 1268, 1270, 1275, 1276, 1282,
1283, 1287, 1289, 1292, 1293, 1298, 1299, 1301, 1308, 1312, 1313,
1320, 1326, 1327, 1328, 1335, 1336, 1337, 1346, 1349, 1350, 1352,
1353, 1360, 1369, 1373, 1382, 1386, 1387, 1389, 1391, 1393, 1396,
1405, 1412, 1414, 1415, 1416, 1417, 1418, 1419, 1423, 1435, 1437,
1439, 1445, 1450, 1457, 1458, 1459])
X1.drop(index=outliers,axis=0,inplace=True)
X2.drop(index=outliers,axis=0,inplace=True)
Y.drop(index=outliers,axis=0,inplace=True)
X1.shape
(1036, 34)
X2.shape
(1036, 252)
Y.shape
(1036,)
Xnew=X1.join(X2)
Xnew.shape
(1036, 286)
Xnew.index=range(0,1036)
Y.index=range(0,1036)
Xnew.head()
| MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | ... | SaleType_ConLw | SaleType_New | SaleType_Oth | SaleType_WD | SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Family | SaleCondition_Normal | SaleCondition_Partial | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.073375 | -0.229372 | -0.207142 | 0.651479 | -0.5172 | 1.050994 | 0.878668 | 0.511418 | 0.575425 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0.073375 | -0.093110 | 0.073480 | 0.651479 | -0.5172 | 0.984752 | 0.830215 | 0.323060 | 0.092907 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 0.073375 | 0.633618 | 0.375148 | 1.374795 | -0.5172 | 0.951632 | 0.733308 | 1.364570 | 0.463568 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | -0.872563 | 0.224833 | -0.043379 | 1.374795 | -0.5172 | 1.084115 | 0.975575 | 0.456019 | 2.029558 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | -0.872563 | -0.002269 | 0.068469 | -0.795151 | -0.5172 | -0.207594 | -0.962566 | -0.574410 | 1.014077 | -0.288653 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
5 rows × 286 columns
Xnew.columns
Index(['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
...
'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
'SaleCondition_Partial'],
dtype='object', length=286)
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=45)
xtrain.shape
(828, 286)
xtrain.columns
Index(['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
...
'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
'SaleCondition_Partial'],
dtype='object', length=286)
ytrain.shape
(828,)
from statsmodels.api import OLS,add_constant
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
rsq
0.9279
model.pvalues.sort_values()
Neighborhood_StoneBr 9.279455e-12
BsmtExposure_Gd 5.010083e-09
OverallQual 7.418255e-09
BsmtFinSF1 1.349800e-08
TotalBsmtSF 2.484970e-07
...
GarageType_2Types NaN
GarageQual_Po NaN
PoolQC_Ex NaN
PoolQC_Fa NaN
MiscFeature_TenC NaN
Length: 286, dtype: float64
col_to_drop=model.pvalues.sort_values().index[-1]
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop SaleType_CWD
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop PoolQC_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop PoolQC_Ex
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop GarageQual_Po
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop GarageType_2Types
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop Functional_Sev
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop Electrical_Mix
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop HeatingQC_Po
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop Heating_OthW
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop KitchenQual_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop HouseStyle_1.5Fin
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop MiscFeature_Gar2
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332 column to drop MiscFeature_Shed
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop BsmtFinType1_BLQ
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop ExterCond_Ex
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop BsmtExposure_Av
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop Exterior1st_AsphShn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop LandSlope_Gtl
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop RoofStyle_Flat
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop RoofMatl_Tar&Grv
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop RoofMatl_Metal
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop Exterior1st_CemntBd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop Exterior2nd_CmentBd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop Exterior2nd_MetalSd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333 column to drop Exterior2nd_Stone
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9334 column to drop Exterior2nd_Wd Shng
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335 column to drop Utilities_NoSeWa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335 column to drop Electrical_FuseF
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335 column to drop Condition1_RRAn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335 column to drop Condition2_RRAe
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335 column to drop RoofStyle_Shed
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335 column to drop Exterior2nd_Other
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335 column to drop Heating_Floor
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335 column to drop Heating_GasW
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9336 column to drop Condition2_PosN
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9336 column to drop Condition2_PosA
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9336 column to drop Foundation_Wood
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9336 column to drop Foundation_CBlock
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9337 column to drop YrSold
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9338 column to drop ExterCond_Po
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9338 column to drop Exterior2nd_HdBoard
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9339 column to drop Condition2_Artery
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9339 column to drop Exterior1st_Stone
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9339 column to drop Exterior2nd_Plywood
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop BldgType_2fmCon
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop MasVnrType_BrkFace
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop Street_Grvl
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop SaleType_COD
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop RoofMatl_WdShngl
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop RoofMatl_ClyTile
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop RoofMatl_Roll
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop GarageCond_TA
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop MSZoning_RH
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop Condition2_RRAn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934 column to drop GarageCond_Gd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9341 column to drop ExterCond_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9341 column to drop RoofStyle_Gambrel
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9341 column to drop HouseStyle_2Story
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9342 column to drop TotRmsAbvGrd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9343 column to drop Foundation_PConc
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9344 column to drop KitchenQual_TA
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9345 column to drop Exterior2nd_BrkFace
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9346 column to drop Exterior2nd_Brk Cmn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9347 column to drop RoofMatl_Membran
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9347 column to drop Functional_Maj1
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9347 column to drop Condition2_Feedr
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9347 column to drop Condition2_RRNn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9348 column to drop SaleType_ConLI
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9349 column to drop BsmtFinType2_Rec
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9349 column to drop BsmtFinType2_BLQ
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.935 column to drop BsmtCond_Po
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.935 column to drop Condition1_RRNe
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9351 column to drop Electrical_FuseP
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9352 column to drop EnclosedPorch
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9353 column to drop Exterior1st_CBlock
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9353 column to drop Exterior2nd_CBlock
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9354 column to drop Exterior2nd_AsphShn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9354 column to drop MasVnrType_BrkCmn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9355 column to drop Neighborhood_SawyerW
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9355 column to drop Neighborhood_Somerst
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9356 column to drop Neighborhood_Blueste
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9357 column to drop KitchenQual_Gd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9358 column to drop BsmtFinType1_Rec
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9359 column to drop Exterior1st_BrkFace
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.936 column to drop ScreenPorch
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9361 column to drop BsmtQual_TA
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9361 column to drop GarageType_Detchd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9361 column to drop GarageType_BuiltIn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9362 column to drop GarageType_Basment
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9363 column to drop BldgType_Twnhs
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9364 column to drop BsmtCond_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9364 column to drop Street_Pave
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9364 column to drop MasVnrType_None
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9365 column to drop BsmtFullBath
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9366 column to drop BsmtUnfSF
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9366 column to drop RoofStyle_Hip
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9367 column to drop ExterCond_Gd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9368 column to drop Neighborhood_Veenker
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9369 column to drop Condition1_Feedr
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.937 column to drop Exterior2nd_AsbShng
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.937 column to drop Neighborhood_BrkSide
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9371 column to drop Neighborhood_MeadowV
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9372 column to drop SaleType_ConLD
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9373 column to drop GarageType_CarPort
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9374 column to drop MoSold
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9375 column to drop MSZoning_RL
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9375 column to drop Neighborhood_ClearCr
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9376 column to drop Electrical_SBrkr
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9377 column to drop Condition1_RRNn
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9378 column to drop LotConfig_FR3
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9378 column to drop LotConfig_FR2
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9379 column to drop Condition2_Norm
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9379 column to drop Fence_MnWw
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9379 column to drop SaleType_ConLw
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.938 column to drop SaleType_Con
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9381 column to drop Functional_Maj2
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9382 column to drop Functional_Min1
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9382 column to drop PavedDrive_P
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9382 column to drop Functional_Min2
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9383 column to drop LandSlope_Mod
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9384 column to drop LotShape_IR3
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9384 column to drop RoofMatl_WdShake
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9385 column to drop SaleType_WD
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9385 column to drop GarageYrBlt
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9386 column to drop GarageCond_Ex
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9386 column to drop GarageQual_Ex
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9386 column to drop GarageQual_TA
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9387 column to drop Neighborhood_SWISU
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9387 column to drop BsmtFinType2_LwQ
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9388 column to drop Neighborhood_IDOTRR
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9388 column to drop Heating_Grav
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9389 column to drop Heating_GasA
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.939 column to drop Exterior1st_MetalSd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.939 column to drop BsmtFinType1_Unf
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9391 column to drop BsmtFinType1_ALQ
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9391 column to drop GarageCond_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9392 column to drop RoofStyle_Gable
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393 column to drop MiscFeature_Othr
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393 column to drop SaleCondition_Normal
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393 column to drop Fence_GdPrv
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393 column to drop FireplaceQu_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393 column to drop LandSlope_Sev
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9394 column to drop BsmtFinType2_ALQ
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9394 column to drop Condition1_PosN
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9395 column to drop Exterior1st_BrkComm
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9395 column to drop ExterQual_TA
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9395 column to drop ExterQual_Gd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9396 column to drop SaleType_Oth
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9396 column to drop Exterior1st_AsbShng
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9396 column to drop Neighborhood_Sawyer
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9397 column to drop Neighborhood_Gilbert
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9397 column to drop Neighborhood_CollgCr
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9398 column to drop Neighborhood_Timber
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9398 column to drop GarageCars
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9398 column to drop GarageQual_Gd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9399 column to drop Neighborhood_NAmes
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9399 column to drop BsmtQual_Gd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9399 column to drop RoofStyle_Mansard
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9399 column to drop Neighborhood_Mitchel
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.94 column to drop Condition1_Norm
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.94 column to drop GrLivArea
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.94 column to drop HouseStyle_2.5Fin
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401 column to drop HouseStyle_1Story
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401 column to drop GarageCond_Po
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401 column to drop FireplaceQu_Ex
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401 column to drop FireplaceQu_TA
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop FireplaceQu_Gd
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop BldgType_1Fam
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop OpenPorchSF
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop LotFrontage
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop LotShape_IR1
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop LotShape_Reg
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop HouseStyle_2.5Unf
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop SaleCondition_Family
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop Fence_GdWo
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop SaleType_New
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop BsmtFinSF2
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop BsmtFinType2_Unf
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop ExterQual_Fa
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402 column to drop Exterior1st_ImStucc
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401 column to drop LotShape_IR2
Xnew.columns
Index(['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt',
'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'TotalBsmtSF', '1stFlrSF',
...
'GarageFinish_Unf', 'GarageQual_Fa', 'PavedDrive_N', 'PavedDrive_Y',
'PoolQC_Gd', 'Fence_MnPrv', 'SaleCondition_Abnorml',
'SaleCondition_AdjLand', 'SaleCondition_Alloca',
'SaleCondition_Partial'],
dtype='object', length=105)
len(Xnew.columns)
105
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
model=lm.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)
from sklearn.metrics import mean_squared_error,mean_absolute_error
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)
print('train_err',tr_err)
train_err 245572615.88164252
print('test_err',ts_err)
test_err 466737027.84134614
print('train_ab',tr_ab)
train_ab 10879.28502415459
print('test_ab',ts_ab)
test_ab 14505.70673076923
from sklearn.linear_model import Ridge
rr=Ridge(alpha=0.2)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)
print('train_err',tr_err)
print('test_err',ts_err)
print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_err 245918989.7018548 test_err 446292659.0930184 train_ab 10879.63210124505 test_ab 14234.733770509793
w=[]
e=0.01
for i in range(0,500,1):
w.append(e)
e=round(e+0.01,2)
w
[0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5, 0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.0, 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.09, 1.1, 1.11, 1.12, 1.13, 1.14, 1.15, 1.16, 1.17, 1.18, 1.19, 1.2, 1.21, 1.22, 1.23, 1.24, 1.25, 1.26, 1.27, 1.28, 1.29, 1.3, 1.31, 1.32, 1.33, 1.34, 1.35, 1.36, 1.37, 1.38, 1.39, 1.4, 1.41, 1.42, 1.43, 1.44, 1.45, 1.46, 1.47, 1.48, 1.49, 1.5, 1.51, 1.52, 1.53, 1.54, 1.55, 1.56, 1.57, 1.58, 1.59, 1.6, 1.61, 1.62, 1.63, 1.64, 1.65, 1.66, 1.67, 1.68, 1.69, 1.7, 1.71, 1.72, 1.73, 1.74, 1.75, 1.76, 1.77, 1.78, 1.79, 1.8, 1.81, 1.82, 1.83, 1.84, 1.85, 1.86, 1.87, 1.88, 1.89, 1.9, 1.91, 1.92, 1.93, 1.94, 1.95, 1.96, 1.97, 1.98, 1.99, 2.0, 2.01, 2.02, 2.03, 2.04, 2.05, 2.06, 2.07, 2.08, 2.09, 2.1, 2.11, 2.12, 2.13, 2.14, 2.15, 2.16, 2.17, 2.18, 2.19, 2.2, 2.21, 2.22, 2.23, 2.24, 2.25, 2.26, 2.27, 2.28, 2.29, 2.3, 2.31, 2.32, 2.33, 2.34, 2.35, 2.36, 2.37, 2.38, 2.39, 2.4, 2.41, 2.42, 2.43, 2.44, 2.45, 2.46, 2.47, 2.48, 2.49, 2.5, 2.51, 2.52, 2.53, 2.54, 2.55, 2.56, 2.57, 2.58, 2.59, 2.6, 2.61, 2.62, 2.63, 2.64, 2.65, 2.66, 2.67, 2.68, 2.69, 2.7, 2.71, 2.72, 2.73, 2.74, 2.75, 2.76, 2.77, 2.78, 2.79, 2.8, 2.81, 2.82, 2.83, 2.84, 2.85, 2.86, 2.87, 2.88, 2.89, 2.9, 2.91, 2.92, 2.93, 2.94, 2.95, 2.96, 2.97, 2.98, 2.99, 3.0, 3.01, 3.02, 3.03, 3.04, 3.05, 3.06, 3.07, 3.08, 3.09, 3.1, 3.11, 3.12, 3.13, 3.14, 3.15, 3.16, 3.17, 3.18, 3.19, 3.2, 3.21, 3.22, 3.23, 3.24, 3.25, 3.26, 3.27, 3.28, 3.29, 3.3, 3.31, 3.32, 3.33, 3.34, 3.35, 3.36, 3.37, 3.38, 3.39, 3.4, 3.41, 3.42, 3.43, 3.44, 3.45, 3.46, 3.47, 3.48, 3.49, 3.5, 3.51, 3.52, 3.53, 3.54, 3.55, 3.56, 3.57, 3.58, 3.59, 3.6, 3.61, 3.62, 3.63, 3.64, 3.65, 3.66, 3.67, 3.68, 3.69, 3.7, 3.71, 3.72, 3.73, 3.74, 3.75, 3.76, 3.77, 3.78, 3.79, 3.8, 3.81, 3.82, 3.83, 3.84, 3.85, 3.86, 3.87, 3.88, 3.89, 3.9, 3.91, 3.92, 3.93, 3.94, 3.95, 3.96, 3.97, 3.98, 3.99, 4.0, 4.01, 4.02, 4.03, 4.04, 4.05, 4.06, 4.07, 4.08, 4.09, 4.1, 4.11, 4.12, 4.13, 4.14, 4.15, 4.16, 4.17, 4.18, 4.19, 4.2, 4.21, 4.22, 4.23, 4.24, 4.25, 4.26, 4.27, 4.28, 4.29, 4.3, 4.31, 4.32, 4.33, 4.34, 4.35, 4.36, 4.37, 4.38, 4.39, 4.4, 4.41, 4.42, 4.43, 4.44, 4.45, 4.46, 4.47, 4.48, 4.49, 4.5, 4.51, 4.52, 4.53, 4.54, 4.55, 4.56, 4.57, 4.58, 4.59, 4.6, 4.61, 4.62, 4.63, 4.64, 4.65, 4.66, 4.67, 4.68, 4.69, 4.7, 4.71, 4.72, 4.73, 4.74, 4.75, 4.76, 4.77, 4.78, 4.79, 4.8, 4.81, 4.82, 4.83, 4.84, 4.85, 4.86, 4.87, 4.88, 4.89, 4.9, 4.91, 4.92, 4.93, 4.94, 4.95, 4.96, 4.97, 4.98, 4.99, 5.0]
rr=Ridge()
tg={'alpha':w}
from sklearn.model_selection import GridSearchCV
cv=GridSearchCV(rr,tg,scoring='neg_mean_absolute_error',cv=4)
cvmodel=cv.fit(Xnew,Y)
cvmodel.best_params_
{'alpha': 2.36}
rr=Ridge(alpha=2.36)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)
print('train_err',tr_err)
print('test_err',ts_err)
print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_err 257484090.64511794 test_err 380859377.19354373 train_ab 11105.32108271728 test_ab 13265.776840227627
cvmodel.best_estimator_
Ridge(alpha=2.36)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Ridge(alpha=2.36)
df2=pd.read_csv('D:/testing_set.csv')
df2
| Id | MSSubClass | MSZoning | LotFrontage | LotArea | Street | Alley | LotShape | LandContour | Utilities | ... | ScreenPorch | PoolArea | PoolQC | Fence | MiscFeature | MiscVal | MoSold | YrSold | SaleType | SaleCondition | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1461 | 20 | RH | 80.0 | 11622 | Pave | NaN | Reg | Lvl | AllPub | ... | 120 | 0 | NaN | MnPrv | NaN | 0 | 6 | 2010 | WD | Normal |
| 1 | 1462 | 20 | RL | 81.0 | 14267 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | Gar2 | 12500 | 6 | 2010 | WD | Normal |
| 2 | 1463 | 60 | RL | 74.0 | 13830 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | MnPrv | NaN | 0 | 3 | 2010 | WD | Normal |
| 3 | 1464 | 60 | RL | 78.0 | 9978 | Pave | NaN | IR1 | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 6 | 2010 | WD | Normal |
| 4 | 1465 | 120 | RL | 43.0 | 5005 | Pave | NaN | IR1 | HLS | AllPub | ... | 144 | 0 | NaN | NaN | NaN | 0 | 1 | 2010 | WD | Normal |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1454 | 2915 | 160 | RM | 21.0 | 1936 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 6 | 2006 | WD | Normal |
| 1455 | 2916 | 160 | RM | 21.0 | 1894 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 4 | 2006 | WD | Abnorml |
| 1456 | 2917 | 20 | RL | 160.0 | 20000 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 9 | 2006 | WD | Abnorml |
| 1457 | 2918 | 85 | RL | 62.0 | 10441 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | MnPrv | Shed | 700 | 7 | 2006 | WD | Normal |
| 1458 | 2919 | 60 | RL | 74.0 | 9627 | Pave | NaN | Reg | Lvl | AllPub | ... | 0 | 0 | NaN | NaN | NaN | 0 | 11 | 2006 | WD | Normal |
1459 rows × 80 columns
df2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1459 entries, 0 to 1458 Data columns (total 80 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Id 1459 non-null int64 1 MSSubClass 1459 non-null int64 2 MSZoning 1455 non-null object 3 LotFrontage 1232 non-null float64 4 LotArea 1459 non-null int64 5 Street 1459 non-null object 6 Alley 107 non-null object 7 LotShape 1459 non-null object 8 LandContour 1459 non-null object 9 Utilities 1457 non-null object 10 LotConfig 1459 non-null object 11 LandSlope 1459 non-null object 12 Neighborhood 1459 non-null object 13 Condition1 1459 non-null object 14 Condition2 1459 non-null object 15 BldgType 1459 non-null object 16 HouseStyle 1459 non-null object 17 OverallQual 1459 non-null int64 18 OverallCond 1459 non-null int64 19 YearBuilt 1459 non-null int64 20 YearRemodAdd 1459 non-null int64 21 RoofStyle 1459 non-null object 22 RoofMatl 1459 non-null object 23 Exterior1st 1458 non-null object 24 Exterior2nd 1458 non-null object 25 MasVnrType 1443 non-null object 26 MasVnrArea 1444 non-null float64 27 ExterQual 1459 non-null object 28 ExterCond 1459 non-null object 29 Foundation 1459 non-null object 30 BsmtQual 1415 non-null object 31 BsmtCond 1414 non-null object 32 BsmtExposure 1415 non-null object 33 BsmtFinType1 1417 non-null object 34 BsmtFinSF1 1458 non-null float64 35 BsmtFinType2 1417 non-null object 36 BsmtFinSF2 1458 non-null float64 37 BsmtUnfSF 1458 non-null float64 38 TotalBsmtSF 1458 non-null float64 39 Heating 1459 non-null object 40 HeatingQC 1459 non-null object 41 CentralAir 1459 non-null object 42 Electrical 1459 non-null object 43 1stFlrSF 1459 non-null int64 44 2ndFlrSF 1459 non-null int64 45 LowQualFinSF 1459 non-null int64 46 GrLivArea 1459 non-null int64 47 BsmtFullBath 1457 non-null float64 48 BsmtHalfBath 1457 non-null float64 49 FullBath 1459 non-null int64 50 HalfBath 1459 non-null int64 51 BedroomAbvGr 1459 non-null int64 52 KitchenAbvGr 1459 non-null int64 53 KitchenQual 1458 non-null object 54 TotRmsAbvGrd 1459 non-null int64 55 Functional 1457 non-null object 56 Fireplaces 1459 non-null int64 57 FireplaceQu 729 non-null object 58 GarageType 1383 non-null object 59 GarageYrBlt 1381 non-null float64 60 GarageFinish 1381 non-null object 61 GarageCars 1458 non-null float64 62 GarageArea 1458 non-null float64 63 GarageQual 1381 non-null object 64 GarageCond 1381 non-null object 65 PavedDrive 1459 non-null object 66 WoodDeckSF 1459 non-null int64 67 OpenPorchSF 1459 non-null int64 68 EnclosedPorch 1459 non-null int64 69 3SsnPorch 1459 non-null int64 70 ScreenPorch 1459 non-null int64 71 PoolArea 1459 non-null int64 72 PoolQC 3 non-null object 73 Fence 290 non-null object 74 MiscFeature 51 non-null object 75 MiscVal 1459 non-null int64 76 MoSold 1459 non-null int64 77 YrSold 1459 non-null int64 78 SaleType 1458 non-null object 79 SaleCondition 1459 non-null object dtypes: float64(11), int64(26), object(43) memory usage: 912.0+ KB
df2.shape
(1459, 80)
df2.isna().sum()
Id 0
MSSubClass 0
MSZoning 4
LotFrontage 227
LotArea 0
...
MiscVal 0
MoSold 0
YrSold 0
SaleType 1
SaleCondition 0
Length: 80, dtype: int64
for i in df2.columns:
if (df2[i].isna().sum())>0:
if df2[i].dtypes=='object':
x=df2[i].mode()[0]
df2[i]=df2[i].fillna(x)
else:
x=df2[i].mean()
df2[i]=df2[i].fillna(x)
df2.isna().sum()
Id 0
MSSubClass 0
MSZoning 0
LotFrontage 0
LotArea 0
..
MiscVal 0
MoSold 0
YrSold 0
SaleType 0
SaleCondition 0
Length: 80, dtype: int64
Xts=df2.drop(labels=['Id','LowQualFinSF','MiscVal'],axis=1)
Xts.shape
(1459, 77)
cat_ts=[]
con_ts=[]
for i in Xts.columns:
if Xts[i].dtypes=='object':
cat_ts.append(i)
else:
con_ts.append(i)
print(cat_ts)
print(con_ts)
['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition'] ['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MoSold', 'YrSold']
cat_ts=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood',
'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd',
'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1',
'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu',
'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType',
'SaleCondition']
con_ts=['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea',
'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath',
'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt',
'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea',
'MoSold', 'YrSold']
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
X1ts=pd.DataFrame(ss.fit_transform(Xts[con_ts]),columns=con_ts)
X1ts
| MSSubClass | LotFrontage | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | BsmtFinSF2 | ... | GarageCars | GarageArea | WoodDeckSF | OpenPorchSF | EnclosedPorch | 3SsnPorch | ScreenPorch | PoolArea | MoSold | YrSold | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.874711 | 0.555587 | 0.363929 | -0.751101 | 0.400766 | -0.340945 | -1.072885 | -0.570108 | 0.063295 | 0.517348 | ... | -0.988013 | 1.185945 | 0.366678 | -0.701628 | -0.360738 | -0.088827 | 1.818960 | -0.057227 | -0.038281 | 1.713905 |
| 1 | -0.874711 | 0.604239 | 0.897861 | -0.054877 | 0.400766 | -0.439695 | -1.214908 | 0.041273 | 1.063392 | -0.297903 | ... | -0.988013 | -0.741213 | 2.347867 | -0.178826 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -0.038281 | 1.713905 |
| 2 | 0.061351 | 0.263676 | 0.809646 | -0.751101 | -0.497418 | 0.844059 | 0.678742 | -0.570108 | 0.773254 | -0.297903 | ... | 0.301623 | 0.042559 | 0.930495 | -0.207871 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -1.140614 | 1.713905 |
| 3 | 0.061351 | 0.458284 | 0.032064 | -0.054877 | 0.400766 | 0.876976 | 0.678742 | -0.456889 | 0.357829 | -0.297903 | ... | 0.301623 | -0.012766 | 2.089451 | -0.178826 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -0.038281 | 1.713905 |
| 4 | 1.465443 | -1.244533 | -0.971808 | 1.337571 | -0.497418 | 0.679475 | 0.394694 | -0.570108 | -0.387298 | -0.297903 | ... | 0.301623 | 0.153210 | -0.729632 | 0.489198 | -0.360738 | -0.088827 | 2.243060 | -0.057227 | -1.875504 | 1.713905 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1454 | 2.401505 | -2.314875 | -1.591330 | -1.447325 | 1.298950 | -0.044694 | -0.646813 | -0.570108 | -0.965376 | -0.297903 | ... | -2.277648 | -2.179665 | -0.729632 | -0.701628 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -0.038281 | -1.359958 |
| 1455 | 2.401505 | -2.314875 | -1.599808 | -1.447325 | -0.497418 | -0.044694 | -0.646813 | -0.570108 | -0.411477 | -0.297903 | ... | -0.988013 | -0.861084 | -0.729632 | -0.353093 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | -0.773170 | -1.359958 |
| 1456 | -0.874711 | 4.447740 | 2.055150 | -0.751101 | 1.298950 | -0.373861 | 0.584059 | -0.570108 | 1.724994 | -0.297903 | ... | 0.301623 | 0.475939 | 2.982161 | -0.701628 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | 1.064053 | -1.359958 |
| 1457 | 0.646389 | -0.320147 | 0.125527 | -0.751101 | -0.497418 | 0.679475 | 0.394694 | -0.570108 | -0.224645 | -0.297903 | ... | -2.277648 | -2.179665 | -0.103169 | -0.236915 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | 0.329164 | -1.359958 |
| 1458 | 0.061351 | 0.263676 | -0.038790 | 0.641347 | -0.497418 | 0.712392 | 0.489377 | -0.037980 | 0.700719 | -0.297903 | ... | 1.591258 | 0.817111 | 0.758218 | -0.004559 | -0.360738 | -0.088827 | -0.301543 | -0.057227 | 1.798942 | -1.359958 |
1459 rows × 34 columns
X2ts=pd.get_dummies(Xts[cat_ts])
X2ts
| MSZoning_C (all) | MSZoning_FV | MSZoning_RH | MSZoning_RL | MSZoning_RM | Street_Grvl | Street_Pave | Alley_Grvl | Alley_Pave | LotShape_IR1 | ... | SaleType_ConLw | SaleType_New | SaleType_Oth | SaleType_WD | SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Family | SaleCondition_Normal | SaleCondition_Partial | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 2 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 3 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 4 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1454 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1455 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 1456 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| 1457 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
| 1458 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
1459 rows × 234 columns
Xnew_ts=X1ts.join(X2ts)
Xnew_ts.shape
(1459, 268)
Xnew_ts.index=range(0,1459)
keep=[]
drop=[]
for i in Xnew_ts.columns:
if i in Xnew.columns:
keep.append(i)
else:
drop.append(i)
print(keep)
['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'Fireplaces', 'GarageArea', 'WoodDeckSF', '3SsnPorch', 'PoolArea', 'MSZoning_C (all)', 'MSZoning_FV', 'MSZoning_RM', 'Alley_Grvl', 'Alley_Pave', 'LotShape_IR2', 'LandContour_Bnk', 'LandContour_HLS', 'LandContour_Low', 'LandContour_Lvl', 'Utilities_AllPub', 'LotConfig_Corner', 'LotConfig_CulDSac', 'LotConfig_Inside', 'Neighborhood_Blmngtn', 'Neighborhood_BrDale', 'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_NPkVill', 'Neighborhood_NWAmes', 'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown', 'Neighborhood_StoneBr', 'Condition1_Artery', 'Condition1_PosA', 'Condition1_RRAe', 'BldgType_Duplex', 'BldgType_TwnhsE', 'HouseStyle_1.5Unf', 'HouseStyle_SFoyer', 'HouseStyle_SLvl', 'RoofMatl_CompShg', 'Exterior1st_HdBoard', 'Exterior1st_Plywood', 'Exterior1st_Stucco', 'Exterior1st_VinylSd', 'Exterior1st_Wd Sdng', 'Exterior1st_WdShing', 'Exterior2nd_ImStucc', 'Exterior2nd_Stucco', 'Exterior2nd_VinylSd', 'Exterior2nd_Wd Sdng', 'MasVnrType_Stone', 'ExterQual_Ex', 'ExterCond_TA', 'Foundation_BrkTil', 'Foundation_Slab', 'Foundation_Stone', 'BsmtQual_Ex', 'BsmtQual_Fa', 'BsmtCond_Gd', 'BsmtCond_TA', 'BsmtExposure_Gd', 'BsmtExposure_Mn', 'BsmtExposure_No', 'BsmtFinType1_GLQ', 'BsmtFinType1_LwQ', 'BsmtFinType2_GLQ', 'Heating_Wall', 'HeatingQC_Ex', 'HeatingQC_Fa', 'HeatingQC_Gd', 'HeatingQC_TA', 'CentralAir_N', 'CentralAir_Y', 'Electrical_FuseA', 'KitchenQual_Ex', 'Functional_Mod', 'Functional_Typ', 'FireplaceQu_Po', 'GarageType_Attchd', 'GarageFinish_Fin', 'GarageFinish_RFn', 'GarageFinish_Unf', 'GarageQual_Fa', 'PavedDrive_N', 'PavedDrive_Y', 'PoolQC_Gd', 'Fence_MnPrv', 'SaleCondition_Abnorml', 'SaleCondition_AdjLand', 'SaleCondition_Alloca', 'SaleCondition_Partial']
print(drop)
['LotFrontage', 'BsmtFinSF2', 'BsmtUnfSF', 'GrLivArea', 'BsmtFullBath', 'TotRmsAbvGrd', 'GarageYrBlt', 'GarageCars', 'OpenPorchSF', 'EnclosedPorch', 'ScreenPorch', 'MoSold', 'YrSold', 'MSZoning_RH', 'MSZoning_RL', 'Street_Grvl', 'Street_Pave', 'LotShape_IR1', 'LotShape_IR3', 'LotShape_Reg', 'LotConfig_FR2', 'LotConfig_FR3', 'LandSlope_Gtl', 'LandSlope_Mod', 'LandSlope_Sev', 'Neighborhood_Blueste', 'Neighborhood_BrkSide', 'Neighborhood_ClearCr', 'Neighborhood_CollgCr', 'Neighborhood_Gilbert', 'Neighborhood_IDOTRR', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel', 'Neighborhood_NAmes', 'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW', 'Neighborhood_Somerst', 'Neighborhood_Timber', 'Neighborhood_Veenker', 'Condition1_Feedr', 'Condition1_Norm', 'Condition1_PosN', 'Condition1_RRAn', 'Condition1_RRNe', 'Condition1_RRNn', 'Condition2_Artery', 'Condition2_Feedr', 'Condition2_Norm', 'Condition2_PosA', 'Condition2_PosN', 'BldgType_1Fam', 'BldgType_2fmCon', 'BldgType_Twnhs', 'HouseStyle_1.5Fin', 'HouseStyle_1Story', 'HouseStyle_2.5Unf', 'HouseStyle_2Story', 'RoofStyle_Flat', 'RoofStyle_Gable', 'RoofStyle_Gambrel', 'RoofStyle_Hip', 'RoofStyle_Mansard', 'RoofStyle_Shed', 'RoofMatl_Tar&Grv', 'RoofMatl_WdShake', 'RoofMatl_WdShngl', 'Exterior1st_AsbShng', 'Exterior1st_AsphShn', 'Exterior1st_BrkComm', 'Exterior1st_BrkFace', 'Exterior1st_CBlock', 'Exterior1st_CemntBd', 'Exterior1st_MetalSd', 'Exterior2nd_AsbShng', 'Exterior2nd_AsphShn', 'Exterior2nd_Brk Cmn', 'Exterior2nd_BrkFace', 'Exterior2nd_CBlock', 'Exterior2nd_CmentBd', 'Exterior2nd_HdBoard', 'Exterior2nd_MetalSd', 'Exterior2nd_Plywood', 'Exterior2nd_Stone', 'Exterior2nd_Wd Shng', 'MasVnrType_BrkCmn', 'MasVnrType_BrkFace', 'MasVnrType_None', 'ExterQual_Fa', 'ExterQual_Gd', 'ExterQual_TA', 'ExterCond_Ex', 'ExterCond_Fa', 'ExterCond_Gd', 'ExterCond_Po', 'Foundation_CBlock', 'Foundation_PConc', 'Foundation_Wood', 'BsmtQual_Gd', 'BsmtQual_TA', 'BsmtCond_Fa', 'BsmtCond_Po', 'BsmtExposure_Av', 'BsmtFinType1_ALQ', 'BsmtFinType1_BLQ', 'BsmtFinType1_Rec', 'BsmtFinType1_Unf', 'BsmtFinType2_ALQ', 'BsmtFinType2_BLQ', 'BsmtFinType2_LwQ', 'BsmtFinType2_Rec', 'BsmtFinType2_Unf', 'Heating_GasA', 'Heating_GasW', 'Heating_Grav', 'HeatingQC_Po', 'Electrical_FuseF', 'Electrical_FuseP', 'Electrical_SBrkr', 'KitchenQual_Fa', 'KitchenQual_Gd', 'KitchenQual_TA', 'Functional_Maj1', 'Functional_Maj2', 'Functional_Min1', 'Functional_Min2', 'Functional_Sev', 'FireplaceQu_Ex', 'FireplaceQu_Fa', 'FireplaceQu_Gd', 'FireplaceQu_TA', 'GarageType_2Types', 'GarageType_Basment', 'GarageType_BuiltIn', 'GarageType_CarPort', 'GarageType_Detchd', 'GarageQual_Gd', 'GarageQual_Po', 'GarageQual_TA', 'GarageCond_Ex', 'GarageCond_Fa', 'GarageCond_Gd', 'GarageCond_Po', 'GarageCond_TA', 'PavedDrive_P', 'PoolQC_Ex', 'Fence_GdPrv', 'Fence_GdWo', 'Fence_MnWw', 'MiscFeature_Gar2', 'MiscFeature_Othr', 'MiscFeature_Shed', 'SaleType_COD', 'SaleType_CWD', 'SaleType_Con', 'SaleType_ConLD', 'SaleType_ConLI', 'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD', 'SaleCondition_Family', 'SaleCondition_Normal']
len(Xnew_ts.columns)
268
Xnewtest=Xnew_ts[keep]
len(Xnewtest.columns)
105
Xnewtest
| MSSubClass | LotArea | OverallQual | OverallCond | YearBuilt | YearRemodAdd | MasVnrArea | BsmtFinSF1 | TotalBsmtSF | 1stFlrSF | ... | GarageFinish_Unf | GarageQual_Fa | PavedDrive_N | PavedDrive_Y | PoolQC_Gd | Fence_MnPrv | SaleCondition_Abnorml | SaleCondition_AdjLand | SaleCondition_Alloca | SaleCondition_Partial | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.874711 | 0.363929 | -0.751101 | 0.400766 | -0.340945 | -1.072885 | -0.570108 | 0.063295 | -0.370808 | -0.654561 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 1 | -0.874711 | 0.897861 | -0.054877 | 0.400766 | -0.439695 | -1.214908 | 0.041273 | 1.063392 | 0.639144 | 0.433298 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 2 | 0.061351 | 0.809646 | -0.751101 | -0.497418 | 0.844059 | 0.678742 | -0.570108 | 0.773254 | -0.266876 | -0.574165 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 3 | 0.061351 | 0.032064 | -0.054877 | 0.400766 | 0.876976 | 0.678742 | -0.456889 | 0.357829 | -0.271395 | -0.579190 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 4 | 1.465443 | -0.971808 | 1.337571 | -0.497418 | 0.679475 | 0.394694 | -0.570108 | -0.387298 | 0.528434 | 0.310192 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1454 | 2.401505 | -1.591330 | -1.447325 | 1.298950 | -0.044694 | -0.646813 | -0.570108 | -0.965376 | -1.129968 | -1.533893 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 1455 | 2.401505 | -1.599808 | -1.447325 | -0.497418 | -0.044694 | -0.646813 | -0.570108 | -0.411477 | -1.129968 | -1.533893 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
| 1456 | -0.874711 | 2.055150 | -0.751101 | 1.298950 | -0.373861 | 0.584059 | -0.570108 | 1.724994 | 0.401907 | 0.169499 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 |
| 1457 | 0.646389 | 0.125527 | -0.751101 | -0.497418 | 0.679475 | 0.394694 | -0.570108 | -0.224645 | -0.303026 | -0.468645 | ... | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
| 1458 | 0.061351 | -0.038790 | 0.641347 | -0.497418 | 0.712392 | 0.489377 | -0.037980 | 0.700719 | -0.113237 | -0.403324 | ... | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
1459 rows × 105 columns
pred=cvmodel.predict(Xnewtest)
pred
array([138429.47870899, 181922.3828758 , 202868.94014064, ...,
196078.94599535, 119756.13433631, 222904.36483644])
predn={'Output':list(pred)}
print(predn)
{'Output': [138429.4787089874, 181922.38287579716, 202868.94014064266, 208819.39668189644, 196351.0740042464, 175876.87481910328, 171840.84378863586, 162109.33245771175, 217398.76465224908, 116522.36785071802, 184373.89166300156, 93185.85826060938, 88185.70322395186, 139578.42495252047, 95311.86454471767, 357983.3862049724, 254734.11956668156, 301680.0166561754, 306996.426854549, 442605.76406786556, 310106.8426112619, 207064.08962930273, 183312.26713828417, 158973.36515351612, 186585.49616143128, 197269.74246785798, 316747.70857901045, 219800.42414222183, 203807.83911753912, 249273.44036504976, 198921.3415624088, 99194.63319666899, 203348.54848009525, 263987.5642483521, 280159.962987258, 244068.69101453235, 177820.69503769258, 166068.76574102088, 156801.67012471744, 148089.5022390158, 181325.88028172875, 142317.15051009695, 226360.77744849265, 244758.88685854065, 231131.56173733296, 199156.618674254, 268538.1335615739, 222324.81312315527, 162138.21168641147, 147865.71180125306, 146438.75466751787, 172584.88088080398, 153983.6808104149, 156370.7181904665, 211972.84534546128, 171247.99467263502, 189413.19077317516, 134272.3638708546, 219143.0567658647, 133762.89815045905, 150684.82850944062, 184990.40253732912, 119535.40914610283, 134221.78617129475, 123590.46332829143, 90909.95470532725, 112838.1240147659, 142486.47204036228, 160429.77426453243, 199608.72893329797, 109169.74998685968, 83396.38548939145, 149091.3835327451, 131271.42093529043, 153579.23503106058, 102235.50550628397, 53860.10836451863, 156364.26870319384, 211481.44623498054, 108719.01640265579, 150695.7949034419, 163970.72051835898, 191834.96757868637, 80596.82878425249, 122254.0733792386, 132474.39208757543, 142815.92195988144, 151159.61927694696, 119972.29955485954, 142182.14881957605, 108491.53768645797, 155245.73725862408, 153695.998517783, 108133.53792153452, 184588.43326195693, 60794.52097317629, 75710.38502719993, 99777.555309386, 73821.38663859945, 104414.9305815761, 122147.67563088117, 122879.69076955915, 130091.49846283128, 160286.8420323224, 146933.03098924542, 250336.60957381845, 86141.62406183736, 230956.49960938183, 144653.23504051054, 137191.6769136621, 90050.26197831621, 153773.02627507597, 273784.7594791404, 138307.137377411, 222827.71578544116, 252885.28246893443, 188199.73438389797, 158048.77939308208, 138901.8656516143, 196572.94420785684, 165699.46258126668, 128876.36038490168, 285854.72390191373, 224590.8494013509, 132066.41143492877, 54924.44531429828, 98031.28408790412, 169733.77913361852, 102742.91085200408, 127403.70141050358, 93331.99003841226, 123474.91027639671, 119167.06204529278, 160413.2876088457, 119777.76435210902, 234823.8029821, 216262.0992155549, 233171.13192139892, 178949.7897771968, 164849.40758357954, 55978.39170065045, 130950.2470717595, 54562.795991741135, 279590.87709933094, 241219.84107461956, 167972.35788709464, 190898.14103562298, 208236.43100193195, 185953.7579636154, 153100.90846783383, 142323.1280684393, 196230.00709265593, 182140.34746853678, 137637.2195853133, 95502.48712696145, 70443.81123380079, 91191.45642384842, 123703.91548119386, 152765.61284557905, 191595.60956891783, 144391.5763685972, 150208.8247324975, 256974.7740015736, 218200.3960764629, 137235.07424724416, 170307.7388391917, 197027.13091269234, 285069.3856962794, 179432.11681466128, 357424.23796386365, 227712.00130326842, 251497.40843438864, 176670.90446118225, 188766.55218692037, 177312.11950813676, 149254.25481734777, 207907.86109171173, 198340.4488338313, 187906.3923094662, 254280.51213443477, 179594.74318961828, 259109.88434878882, 223727.16225495777, 239769.2726016785, 203931.90421287887, 152561.87456128676, 162644.66578562197, 129423.8770993127, 135636.2979873445, 117401.34762603228, 116920.15903022978, 89162.7330403774, 92618.326175025, 143337.37781415685, 119877.35779827271, 134950.34965314547, 150674.74943881988, 140625.37518365047, 116529.41440579115, 152617.8683051131, 417121.39821140387, 385172.58424333145, 369909.0447555125, 450225.9141111048, 320232.5893589993, 326459.97796087014, 388466.84446328436, 339572.4655591585, 312381.4255479584, 342699.5869010808, 257941.04521642393, 386944.8625614125, 291702.3256386053, 246076.76974353727, 201576.40241075627, 203465.93511894258, 220602.11366330695, 449677.1857577078, 374521.1036355203, 329929.76694164274, 259400.26174930015, 316668.7025029083, 187803.497988512, 177448.39292743854, 172928.37859446314, 166271.1003146828, 165655.2424729299, 189524.98748213277, 191048.81355442072, 192700.708855324, 181620.47545351196, 266194.36671695573, 171984.16708441262, 185740.3849633059, 164076.14208794083, 273255.15205503604, 168359.71833188363, 329562.5599378821, 286164.10435813287, 260216.018175547, 272902.8082557515, 233546.66998082542, 220947.5505812219, 261236.13280867095, 242270.52123730024, 401674.38162064087, 227834.4594419556, 203241.59470907468, 256352.25288947637, 228219.5662586127, 277578.61252499523, 246933.15750134527, 279158.2571216813, 225439.88884770533, 217121.39137476485, 177539.81030641388, 171549.95713128726, 139144.8763991555, 221260.9342564207, 228780.64516216944, 158549.7056225067, 121765.482703783, 160772.3067795186, 204364.27733140418, 244622.09709575571, 181507.6005057965, 131515.47953702917, 164719.53304469978, 170821.56121789117, 177715.70686499833, 110148.45606121437, 146745.63572795983, 117681.63536123862, 127837.64254301139, 101536.3658051752, 105730.00842961014, 220997.9325256552, 253623.38257031923, 239534.04243322505, 210188.24489826983, 186168.11035863173, 180727.40397832473, 178094.94975848158, 262909.6556043544, 211440.97824693832, 191017.92317171977, 238904.84020179062, 228828.21913386468, 153888.92389727948, 138490.87405815237, 240729.36363899178, 109524.59478181668, 158979.50080283353, 196415.62516401758, 179394.3914623587, 36191.55448418936, 127186.41558924245, 150789.56635909484, 174501.51185627983, 164556.08196090517, 155036.0486237991, 198293.00398122973, 188553.35783443012, 114365.25814924363, 200635.21921313996, 195910.70281905885, 226913.03298153606, 149523.71805309877, 174874.38865589158, 159956.00981687638, 125816.4739861213, 147887.85265752548, 122901.43753088114, 156823.33950441936, 147866.2148756576, 132551.01893685575, 108276.70414154154, 152519.046183327, 134488.57793160315, 166513.1281296655, 131578.60639746443, 74295.00247835163, 142345.801675918, 92466.49326598799, 137689.3529912228, 108464.21543218491, 150135.28932641432, 34835.3729203433, 103994.81516461048, 61481.0664562959, 217850.08342827685, 164803.95167191327, 130967.6527925059, 161482.80729140504, 144930.22807076105, 145033.24190551694, 129687.77799950597, 118202.32368768586, 109027.62243616415, 118977.72590028122, 139289.0210118525, 117899.05567483777, 156460.37246594005, 131481.05537066306, 135617.6059251635, 126060.88076998547, 147568.93494957953, 125393.28292628637, 128506.61495206412, 135302.35611872625, 75602.12349310328, 120470.23521404486, 129229.83410764953, 93586.74865827282, 47351.73122110705, 102604.96219125105, 115752.00423070084, 172941.29247808945, 140882.51454627482, 31701.357075515058, 93471.23554235791, 151021.66691965744, 23501.62617813512, 129881.91012802455, 147091.60723464517, 94638.12310308027, 102398.72114126298, 93887.43366667932, 111272.58451057199, 155800.921307205, 158375.50779511, 82579.30159071446, 148643.30036670325, 118525.7087054989, 111811.20725602005, 139114.42621050752, 67624.57025435042, 99687.20413948664, 102484.52220772073, 126058.81603687367, 154771.88530741163, 97322.49875895528, 147536.08519548847, 144129.0647543536, 148362.48936351412, 153554.7051379874, 171065.2539408314, 44135.62368854035, 94393.06614495847, 120073.46037596744, 164461.60487729686, 124105.73295635525, 107451.9904094457, 164771.45180038374, 168266.33612182227, 221941.2993238641, 135203.97307443141, 157360.01691138017, 92317.29760187582, 139784.66097539556, 94047.13307779716, 318686.51938343805, 311615.5946641617, 311682.4037077299, 351615.9660099277, 337156.1266762555, 214954.52946712388, 291947.0678362651, 208247.36712714346, 220111.36776792782, 264286.98174149037, 174564.24101345858, 255857.17395245918, 139189.13969231935, 203134.45991623309, 209515.4085039153, 217200.69670411263, 203192.34404969247, 154905.64513047627, 131535.43958445374, 252825.54917552473, 247857.09649351798, 191739.81824268476, 213218.56865698978, 249585.85653587166, 288442.45314632374, 212513.54132519953, 248667.24939594234, 172970.5084899331, 120521.51026439118, 132366.24486422577, 100587.37323212904, 136001.88765997294, 123220.75171928374, 140544.51109811408, 136439.7054081345, 114373.27754895414, 112784.12062673243, 165673.43209732475, 144622.7199822299, 195511.0537535244, 198075.00313155563, 233779.92047491102, 154405.16010485764, 203627.85120323166, 189301.00094262412, 229841.68118760464, 107211.85634021979, 122121.56049959129, 117792.50172657685, 235063.40112237266, 317551.9070066282, 163357.29856745835, 64401.31489398131, 297081.2291386338, 66615.75943047294, 246466.99399849656, 142269.06646419264, 175919.16525024283, 176357.648500298, 381997.7291962757, 334546.77840783563, 238143.16071765427, 215178.05466501976, 203324.26229819236, 381870.85692429286, 141732.40881624274, 177102.52331675973, 144019.97648989275, 125331.44541698895, 144043.47933495115, 145023.15907325668, 197659.80071695548, 185910.84143835117, 175120.21212628746, 213190.25416544953, 187394.75401502167, 177635.4580228433, 247867.97208849495, 191462.12118342644, 180701.7848374815, 177031.15511708736, 227020.3251234871, 389259.1216015241, 397932.31958114356, 170468.7988067278, 348868.8066709138, 244516.3938128702, 249511.26531479257, 193648.91944047075, 255455.605195618, 213627.0366068422, 121074.2292614466, 180936.88707861817, 134006.82579026523, 282412.4757696483, 158582.73587247572, 268190.0278730227, 151458.63774400784, 106143.83297815172, 119899.91408446936, 88721.75797738906, 105645.51454723132, 106301.1071692459, 132826.45625321008, 132117.23334481934, 302486.8487801247, 399414.58390756766, 371497.43785492424, 399428.3697664682, 426506.40727862227, 369524.2544639041, 281381.251722747, 342708.09379677055, 448011.4117572468, 262165.44382581604, 341917.29964322184, 347892.982684918, 308603.6281021519, 207777.87267457967, 340425.6965231652, 212046.74880517123, 201481.11800643528, 175169.37546905736, 218186.35092013175, 210722.41931299312, 202892.91291774274, 169840.83234344213, 193760.28801021515, 210130.573789599, 233115.29710737302, 224395.71901733946, 169919.27192430914, 253392.85049975087, 186397.74010079983, 240940.05710132772, 301121.6532435617, 302672.90274331963, 290886.79225945077, 300010.40899109415, 261205.24270568154, 250530.84777619847, 243158.94961150724, 268512.49800795247, 233073.26936316217, 227632.9549625179, 236170.55191294695, 227797.30509724433, 197048.22069375147, 195149.46044638313, 138997.49867625427, 162579.9784610229, 187033.80034872342, 192375.85767115757, 222625.1064614572, 197998.81265946198, 197851.78383190278, 102801.97746827123, 141552.43043323493, 55532.89889307605, 90459.21654457913, 195130.31010070076, 147015.38334247543, 266545.24014003365, 329347.69409680495, 172256.872798701, 157450.7224540388, 152767.33149478797, 175970.19294988248, 260226.13471168114, 235015.7262834443, 256991.3354256758, 249415.644646206, 169584.0560419215, 234639.72689725415, 194492.71545014292, 198842.5697846709, 289055.0537552781, 204751.7719298213, 314419.7860925276, 289548.26966100244, 214796.1715603831, 181602.7892846888, 178922.58070091036, 209345.72983640333, 150508.32041849272, 156870.7695103031, 136293.16561239943, 146504.92168892777, 173247.4721168264, 100420.81827066214, 107620.73795136712, 153885.2513885084, 79292.26718545465, 165266.2500208295, 129548.77356742903, 105964.21942893325, 216508.22481100596, 133529.60430031831, 177627.4341555335, 197063.37990430117, 138596.3288644447, 124441.73666750474, 152021.28175116767, 132962.52116339596, 174799.16213098972, 121674.69034674516, 161562.1716570266, 104314.90795296044, 107523.30877889728, 87459.75485247225, 143642.07976490678, 136365.323062294, 176406.88420089456, 180816.62885967747, 129360.96031906025, 154889.00862701045, 124933.84173252151, 139594.45766026538, 127748.55504736176, 134640.7146472022, 135791.92066547266, 160798.0328853391, 113146.07431470673, 120741.7693228291, 115001.07769565894, 119775.40594739858, 100795.2840663769, 77510.98608030658, 122900.2797588287, 94267.95603702412, 113552.05531088251, 142404.81577573868, 114633.04392102559, 142179.0803233947, 68102.9977076598, 92572.2736507081, 176031.6636534765, 27463.570292582648, 73033.63364150637, 103310.54646559648, 115582.19002962207, 100732.88623145944, 149340.7572972442, 139164.39085927757, 39243.76439813594, 198730.59021824383, 129615.88895335414, 119118.07265919192, 125587.66275438036, 149587.2679873916, 143747.89259715413, 120013.10277584904, 109642.50660756098, 171930.82739334583, 110963.87287621095, 162750.81326888286, 133948.39056645273, 107609.9758819217, 105342.54708320393, 114639.41938113843, 118442.44963986876, 62222.792930974654, 167998.1428648369, 130989.6097214974, 140895.7427373501, 179717.10892686073, 137757.44916427188, 98118.04928239464, 153063.57447682144, 119262.0356506238, 109509.37424780827, 126486.1173101067, 136746.94929935542, 115607.62748736342, 52201.875983979524, 116821.62586100842, 137858.82524586818, 146582.76324229746, 136322.8155009722, 168382.89295415612, 128039.05173667414, 133246.60616895728, 88925.41270249635, 145529.20113969527, 191785.36736177217, 98222.96487275104, 135951.27906118304, 144839.76150655156, 243162.81046060685, 117683.38232907944, 198395.31168425624, 170893.91763795126, 106059.9562548733, 147123.3108342794, 260179.93037898382, 231963.62661341947, 246178.20597839856, 208353.0422652168, 189787.94111736427, 230664.10504952117, 367239.26177103596, 347600.65093179466, 236149.562895242, 193292.18312942446, 160231.29993884172, 231904.46020181413, 214216.23650854273, 201084.77243551024, 228226.03228225495, 153808.15861286895, 141078.61636944953, 165719.8103252749, 227744.90290441108, 265943.5408328518, 309101.01997008093, 246686.39166818722, 215670.1193832975, 129144.42535349948, 236933.00588698185, 198995.21356525226, 226419.5727668696, 194290.27690781662, 120118.75600044773, 125377.33144371567, 161339.11710059587, 146593.20488198486, 151993.74299561873, 358838.0317496562, 76517.03288718229, 80617.3225434727, 54253.09490099475, 138655.0296312985, 100956.42561142328, 117552.530475188, 107220.38596331837, 122208.02074078948, 160275.86185012653, 182087.67893365314, 152195.70257860294, 163391.36275496174, 203393.295008815, 152036.86466486775, 202160.83784313762, 145445.892705295, 156758.34128082034, 204475.53735983232, 261542.04186195374, 273543.5384706357, 125081.09996256932, 117193.67182988509, 131618.90826507684, 108869.56210962395, 120146.59081688045, 100638.22316088666, 174244.8857480114, 79916.65202789569, 69720.30753985314, 67886.95508109637, 55898.147332809196, 298208.67038415046, 306177.0740172246, 297217.92449407576, 217205.24533260264, 135894.74861289776, 202011.36677992198, 188875.4348645324, 280136.3476065157, 257847.96653787146, 153096.3848311007, 223874.65257338592, 190667.68760303984, 198811.27984629583, 244824.0313451952, 234458.66232419456, 253635.81255032468, 299715.848588374, 186619.78223450034, 120743.40212048584, 166312.5542308453, 147894.7051254941, 130617.69627389591, 131997.7968104132, 103411.812878212, 85625.68118145775, 146590.2721675765, 124092.77806330207, 132420.00447776806, 124243.91942108335, 133617.90068812386, 178323.25700543952, 202579.07494740016, 158886.65512165462, 175596.10737456678, 193382.5274249134, 171123.144911882, 223013.4753312911, 153918.42220266402, 179122.8164508947, 151624.71773809323, 212319.6643574636, 241242.08960095132, 381283.9359330544, 479182.17877912614, 175550.30013877203, 322135.72658766655, 385024.807550395, 408743.5884153809, 154753.17654619663, 195351.84139102924, 224039.6181363537, 183370.83518623788, 154550.32302384303, 194121.9821661367, 175622.6136172666, 203606.43504716124, 190159.0331991538, 154876.18079408287, 140537.18062877626, 120550.43694351442, 152853.34516977132, 181131.77006385144, 104155.29057515261, 120169.61942807103, 145182.280532402, 125150.07825761207, 367479.37180922984, 287746.488212383, 346809.9278893401, 434706.986782721, 342197.8175275045, 396843.4882134609, 444101.27322378196, 386444.0164313781, 438404.9199889888, 288714.75208444835, 370281.9944520608, 353450.20650876855, 348906.39119395637, 327976.4382657273, 326894.8363166747, 252743.626237532, 244397.51578924, 255316.3415453781, 201519.20104408893, 191381.93448090032, 199114.2332071726, 212106.52193880652, 295570.5476986, 211232.24368955955, 205191.67607183626, 202784.32132048998, 176511.44130283588, 201129.46988652452, 186614.65495062433, 202765.58273876234, 198239.22918691766, 192067.5990034392, 190319.29506574618, 185632.699430741, 230166.47659640887, 192160.21136947247, 197726.33273193036, 181078.34607205825, 217389.7555998438, 172348.12683740474, 203850.26533091825, 230660.26664576028, 199766.7181135756, 191781.92206047973, 320704.28689733136, 376235.10422798095, 309699.49742121034, 259290.41152462788, 281772.7518251694, 304381.8977116483, 201839.12009329413, 258787.150846866, 211008.39539800683, 374746.70944746304, 211608.82737323048, 222495.35816139294, 220134.61587973495, 217313.19269570068, 219102.97838416422, 221387.67349143722, 205342.85812112456, 252410.12269906915, 204285.42334143427, 322020.7183450103, 273142.71402870095, 242050.50068421243, 255468.5500980594, 148492.84095313394, 144127.54083126457, 153247.53201508668, 185088.12411201218, 199871.70324813452, 126955.8628805514, 106129.60761638155, 149897.0568603247, 271041.54813202564, 144582.61255924354, 167731.73106543717, 206694.86956779513, 184368.66813848412, 212028.76094941457, 219532.03624580542, 203459.43213765166, 169692.12425778835, 172970.1212817037, 198974.36579117214, 287640.61389435217, 312532.9514125676, 195914.37621308383, 281323.7879556763, 332130.72815202095, 142751.69792631172, 223885.54199808807, 138815.54645441356, 159389.38432348942, 195370.6910853681, 198991.83918230908, 248306.04641513503, 160212.21294003318, 122919.88895824263, 136914.8471725338, 107479.88989534392, 113030.35466647969, 150362.81681228834, 148889.5587027578, 122336.57950094434, 164620.5578070357, 151068.90227306163, 216189.71601170834, 140925.09303434193, 228505.15127612016, 122000.4419587551, 49831.4697283869, 51726.280720195675, 125967.20423822216, 123135.27715202075, 152102.01678111553, 155352.65094345622, 149307.73945747383, 81673.29442118375, 140303.72893098384, 153432.1881297266, 120971.8032969325, 190708.63634219606, 114457.96726618815, 170632.12672567595, 135976.1898746389, 168042.10683685914, 142679.35124530847, 139513.73255852994, 127702.8878664004, 124937.62037786123, 134455.2723906892, 123644.98585200362, 142371.9182692662, 111282.59856877034, 122068.3479020314, 139082.98729349885, 242283.53441493545, 127905.68403636603, 126895.71942160984, 180008.59458671126, 107019.52071677287, 136638.89479005174, 103445.52247675297, 148041.81076362045, 142863.58125913976, 144392.46575217028, 165041.107652188, 120860.54276606809, 108197.7015483076, 116060.33337623032, 90005.98416568211, 130092.8549179791, 96309.43117420172, 92574.28181266735, 135116.81103196027, 130685.24744002642, 80381.92859908173, 144815.14270623866, 188519.44716388156, 131801.9416947797, 110510.55403771321, 171142.66710819598, 125877.88417970031, 209734.63013474218, 93170.93858600974, 120604.92660942551, 79460.96280625368, 167291.67860155582, 130063.78225635845, 134323.63248711405, 111884.63069312452, 139366.16034212967, 120831.88531768674, 130785.82756550334, 120532.95124185547, 195634.86221957515, 122234.5104458582, 117114.50491916906, 125088.25444636957, 95522.63666258394, 98849.20461204533, 217303.28524687735, 212840.66083243457, 201959.82301365433, 102337.70324220472, 72458.90719618049, 229334.41411041727, 85986.76557015684, 98157.09673266819, 156204.4128525215, 117457.21224408166, 160750.5640015619, 117414.43410114633, 96189.59650181116, 101627.22796616153, 119967.52478943615, 118765.31140029663, 149383.61298725504, 189526.6621487761, 156666.73351392764, 158336.83286827337, 142519.75466090534, 98593.33912143968, 177931.53993731955, 154036.42267690046, 161251.4511642059, 92102.46065565437, 221681.12128926296, 174886.13241769292, 115810.92514651339, 94135.71219188115, 105394.54126094935, 148595.05578417872, 151841.2218037871, 90206.55606835423, 207431.91852099454, 216307.00617818805, 244868.5349671353, 275751.0945082153, 247988.7436536604, 213927.8780019587, 214002.66382210449, 171314.4152225218, 219036.89949559598, 214513.79165613413, 236110.2957085833, 163667.0771046738, 176105.2181313792, 154184.86293097705, 152744.52772134016, 249539.54891547762, 226063.91672580806, 189626.77197337413, 224775.2599771291, 122603.68578899233, 138538.7333757051, 152233.90520751284, 158124.4218331888, 122802.09916363825, 119293.18524261794, 147864.7087771123, 121601.82603893954, 259298.93869248132, 228668.15199898044, 198107.12368148362, 230717.59517247768, 283169.62071148737, 233846.67404248414, 251277.99327507938, 188479.2961389908, 188363.21806036192, 180781.28007992436, 179195.35829609388, 160490.85977738272, 126968.68381379744, 121126.56751189235, 142639.99611990788, 122743.67050600931, 138204.70412299634, 178559.71976288868, 163615.66800391613, 701156.765538261, 139920.41761457315, 130325.83512693213, 64773.587578726176, 83784.2285135311, 104879.20207959722, 103510.8106161064, 120123.57616436724, 176285.34874233027, 153620.75329614404, 168569.85649360484, 158007.02683339367, 153376.1469662155, 168877.64834343677, 193863.61217893174, 151697.31293009082, 177415.46858627157, 120223.57715361458, 218146.8674978097, 239298.24098789567, 117267.20126432633, 219743.88297312235, 156765.34800398335, 237494.6444048066, 288372.7236191947, 140900.314455415, 72757.51149717253, 147385.3017619327, 75111.89895621527, 49987.745372774996, 102673.6885584925, 129773.3515357665, 100084.96545038045, 288012.3855110599, 193596.1818191843, 194556.5504929573, 228639.557940948, 213391.48661085602, 140313.4969314411, 155934.84761981427, 196131.4655043463, 216262.45922282967, 217365.48962944795, 266793.0328757527, 182766.66583829094, 213053.74607855972, 272300.8105414444, 197647.22857608393, 288203.53475885454, 337667.3454418628, 212547.77228126698, 146569.22999881307, 72456.8689047016, 90539.95742908494, 86360.62279414793, 65943.25639778457, 147687.99994954304, 228269.37387424192, 206496.62920422264, 155586.53809486298, 110367.88777396374, 173453.36748014676, 152787.06266924992, 133526.45822447503, 123264.12212678549, 170293.98370109888, 142757.28284215333, 217618.95617047395, 275614.4330308341, 199026.41940570634, 195429.89109219395, 179824.2582143552, 189450.18471460917, 241069.19103935754, 286891.65161887393, 291686.55463747296, 168982.60578596027, 175520.820821602, 447591.21523092675, 490277.2928912849, 380956.8440650725, 446503.7601083183, 408563.94468557334, 316190.4015013628, 415808.2110628723, 153139.3997165034, 177931.73340394592, 206575.62390434943, 275651.5343219107, 181065.83709960285, 150794.56073721085, 97907.09413973909, 186209.05372208153, 102234.03058285851, 108887.50669068188, 103071.02389621155, 86636.04373292232, 101013.20254676558, 142409.96201607646, 152220.27269719445, 120362.8747689726, 130187.06449785706, 392944.64174619515, 252480.44286834646, 273923.39803160506, 389071.58889635454, 313492.8863713227, 359915.32762092364, 331702.32412109256, 321731.82687504624, 360820.16285757895, 356376.17777633015, 367820.7628518151, 279442.5861199772, 285140.54898573144, 340066.6648182479, 283901.90979138564, 181263.81358488288, 188554.09060688585, 188386.62203504506, 270620.5482414162, 188692.2836755924, 193289.11369480233, 197422.39306380588, 198095.20472162875, 183284.27770817355, 189779.0848416425, 204276.47048315304, 255636.4463451869, 267204.83742991416, 275999.50572589133, 380423.67580339254, 301501.2414826021, 452602.8494790476, 304431.1231723955, 296121.82364301675, 259984.63430160374, 303295.73551216733, 213202.89043337444, 218986.198674254, 396307.38920692937, 200434.6502980589, 146485.62397018823, 206756.69663424834, 142778.7998840179, 188338.39179845358, 180417.14988865994, 193978.4898227997, 194825.6852878154, 164336.70763798218, 141562.39851011432, 138578.97023013065, 99324.73809727357, 117997.1794880351, 140230.5629389141, 121774.98500952034, 90562.8839483646, 107797.66790901353, 141278.19987579557, 105877.69332095784, 136674.5486688003, 263041.27111050003, 364753.9452677185, 160144.932553178, 140954.81279999044, 179041.59458977348, 134058.30729588642, 195597.36371704488, 215895.2730557636, 127789.93884246066, 165060.36064649702, 140878.07863069436, 156268.5896515687, 150663.27265455533, 136659.70943285845, 126497.68777666989, 151666.79141522403, 141078.37235013754, 168808.27344893554, 153113.9335107769, 155179.796231012, 136254.16642568793, 135279.88765275705, 157155.11293582665, 152963.87719816322, 139038.35903351847, 145233.88963091944, 120364.80921359395, 134742.88073758213, 153647.12380012465, 143111.9912849671, 153068.70334245748, 161868.03100886568, 154702.54349931146, 158688.48528416667, 140944.05363706325, 150856.15559950273, 147568.2366670819, 105746.93297921451, 126995.2144341195, 122493.61121609717, 137860.51710229748, 200556.07704101258, 64726.772736860774, 241696.23604502014, 142948.377716215, 84219.37811992301, 57353.226243885816, 67034.23472451983, 160418.35443062935, 132813.9713964555, 140380.26455144674, 143400.84127476034, 184055.81887770962, 152649.0912792618, 296201.64887169003, 158154.34206929588, 93954.68118211374, 120728.93031846322, 137024.46522218134, 137359.87057595217, 99201.09983218079, 81693.00376044045, 175944.17260347956, 163690.25507156365, 119520.81529486913, 143875.738922194, 155239.18174704447, 121989.1710724521, 118946.79108510469, 98850.55887950966, 92790.40987850018, 95999.07109962548, 87493.02938836819, 99102.50888656071, 136844.1257397713, 52574.69934081021, 130906.12188186386, 67144.02786706992, 165152.77309915412, 96560.74122904714, 113967.884794089, 40717.92802351952, 165765.99242292336, 89149.95801904338, 114126.15598869117, 93534.09181417814, 275530.88027190446, 112708.26496230916, 116428.57366503109, 68327.55633655681, 112605.25803559413, 134289.95271062982, 181125.3051434337, 138769.40274839848, 115151.52672857908, 62152.14042606461, 163386.22499346253, 155251.8306778763, 130452.05153303975, 118362.8554811048, 157160.92638454185, 172518.55475947505, 156621.75839719296, 158570.29887832655, 104345.51639109678, 221670.21729154186, 143674.21145468083, 137631.34875631554, 152937.59781549068, 144436.37589417305, 94380.7587456967, 199131.63507841522, 368665.46495252405, 184445.41210521135, 151526.43379804303, 142121.9532831739, 142227.29571718877, 242884.85485821468, 188382.83846184553, 232323.82478164783, 184499.39060951263, 258128.2294930439, 312613.1245550384, 237339.41209869657, 227312.75326894768, 194082.434405691, 161756.2712535808, 147136.8907409109, 184850.9661957688, 206057.68195981305, 210776.79437763646, 231461.83593437847, 169443.1490255001, 167979.01855774497, 122440.1718477832, 217274.05192358387, 216864.46540315868, 218624.7251046415, 203032.82814575458, 267769.7415691807, 232222.06274020358, 226904.67722060857, 235295.7894917994, 132727.174632166, 202767.53885580593, 204076.25563043874, 191771.50314169703, 208078.81535671995, 96619.45787562866, 135642.34748469264, 140844.10652705244, 193283.03503116168, 140676.97937635297, 253196.85821965965, 144917.62520178815, 145027.37225614063, 90408.87800190836, 103019.68497988798, 107073.67646613298, 142670.18732720602, 61616.695506469536, 30324.36887887714, 94542.29430108203, 152105.29938538818, 107052.80794320175, 169481.19962689054, 154442.94822600015, 180116.8898543797, 146656.48442787863, 108123.59999049442, 159762.76339561166, 202218.47596971004, 205601.14659464, 203496.41609014367, 191163.0759672394, 257864.73605832632, 119619.75644580876, 147957.459073189, 36629.56996691071, 83717.46209540308, 165641.93072052786, 30276.621805440125, 46220.80783560721, 50552.479847571, 337400.36010115047, 294457.09440690547, 245582.07162181838, 151678.98120989362, 214351.10805479222, 161987.4474652591, 243144.13072270897, 200616.00859926708, 313780.35687637853, 330191.7654350875, 88763.81397332667, 203758.73645955036, 112259.60771355688, 122385.44742968096, 159279.7743693715, 66700.98302041322, 78203.45085746724, 153033.48208572698, 77092.49729270308, 64136.51587698239, 79205.20192973368, 81236.38607881655, 196078.94599535485, 119756.1343363137, 222904.36483643937]}
len(predn)
1
ID=df2['Id']
ID
0 1461
1 1462
2 1463
3 1464
4 1465
...
1454 2915
1455 2916
1456 2917
1457 2918
1458 2919
Name: Id, Length: 1459, dtype: int64
df_new=pd.DataFrame(ID)
df_new
| Id | |
|---|---|
| 0 | 1461 |
| 1 | 1462 |
| 2 | 1463 |
| 3 | 1464 |
| 4 | 1465 |
| ... | ... |
| 1454 | 2915 |
| 1455 | 2916 |
| 1456 | 2917 |
| 1457 | 2918 |
| 1458 | 2919 |
1459 rows × 1 columns
df_news=pd.DataFrame(predn)
df_news
| Output | |
|---|---|
| 0 | 138429.478709 |
| 1 | 181922.382876 |
| 2 | 202868.940141 |
| 3 | 208819.396682 |
| 4 | 196351.074004 |
| ... | ... |
| 1454 | 79205.201930 |
| 1455 | 81236.386079 |
| 1456 | 196078.945995 |
| 1457 | 119756.134336 |
| 1458 | 222904.364836 |
1459 rows × 1 columns
df_final=df_new.join(df_news)
df_final
| Id | Output | |
|---|---|---|
| 0 | 1461 | 138429.478709 |
| 1 | 1462 | 181922.382876 |
| 2 | 1463 | 202868.940141 |
| 3 | 1464 | 208819.396682 |
| 4 | 1465 | 196351.074004 |
| ... | ... | ... |
| 1454 | 2915 | 79205.201930 |
| 1455 | 2916 | 81236.386079 |
| 1456 | 2917 | 196078.945995 |
| 1457 | 2918 | 119756.134336 |
| 1458 | 2919 | 222904.364836 |
1459 rows × 2 columns